diff options
author | 2018-06-18 09:57:19 -0700 | |
---|---|---|
committer | 2018-06-18 09:59:59 -0700 | |
commit | e80732c9895d1283af9b98d6277ad1a1015e2e9a (patch) | |
tree | 14895657394f9cdfed8435460e37fe89a45ba599 /tensorflow | |
parent | 8ecf506fb8464dd273ce59f512f5e20d37dd5cfd (diff) |
Merge changes from github.
PiperOrigin-RevId: 201011811
Diffstat (limited to 'tensorflow')
222 files changed, 3241 insertions, 894 deletions
diff --git a/tensorflow/BUILD b/tensorflow/BUILD index a73c4ca3aa..6d134dbb80 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -475,7 +475,7 @@ tf_cc_shared_object( # excludes all but a subset of function names. # On MacOS, the linker does not support version_script, but has an # an "-exported_symbols_list" command. -z defs disallows undefined -# symbols in object files and -s strips the output. +# symbols in object files. tf_cc_shared_object( name = "libtensorflow.so", @@ -489,7 +489,6 @@ tf_cc_shared_object( "//tensorflow:windows_msvc": [], "//conditions:default": [ "-z defs", - "-s", "-Wl,--version-script", # This line must be directly followed by the version_script.lds file "$(location //tensorflow/c:version_script.lds)", ], @@ -515,7 +514,6 @@ tf_cc_shared_object( "//tensorflow:windows_msvc": [], "//conditions:default": [ "-z defs", - "-s", "-Wl,--version-script", # This line must be directly followed by the version_script.lds file "$(location //tensorflow:tf_version_script.lds)", ], diff --git a/tensorflow/c/generate-pc.sh b/tensorflow/c/generate-pc.sh index 02a6a58b61..7184ad68fb 100755 --- a/tensorflow/c/generate-pc.sh +++ b/tensorflow/c/generate-pc.sh @@ -15,10 +15,12 @@ # ============================================================================== TF_PREFIX='/usr/local' +LIBDIR='lib' usage() { echo "Usage: $0 OPTIONS" echo -e "-p, --prefix\tset installation prefix (default: /usr/local)" + echo -e "-l, --libdir\tset lib directory (default: lib)" echo -e "-v, --version\tset TensorFlow version" echo -e "-h, --help\tdisplay this message" } @@ -26,7 +28,7 @@ usage() { [ $# == 0 ] && usage && exit 0 # read the options -ARGS=$(getopt -o p:v:h --long prefix:,version:,help -n $0 -- "$@") +ARGS=$(getopt -o p:l:v:h --long prefix:,libdir:,version:,help -n $0 -- "$@") eval set -- "$ARGS" # extract options and their arguments into variables. @@ -38,6 +40,11 @@ while true ; do "") shift 2 ;; *) TF_PREFIX=$2 ; shift 2 ;; esac ;; + -l|--libdir) + case "$2" in + "") shift 2 ;; + *) LIBDIR=$2 ; shift 2 ;; + esac ;; -v|--version) case "$2" in "") shift 2 ;; @@ -55,7 +62,7 @@ echo "Generating pkgconfig file for TensorFlow $TF_VERSION in $TF_PREFIX" cat << EOF > tensorflow.pc prefix=${TF_PREFIX} exec_prefix=\${prefix} -libdir=\${exec_prefix}/lib +libdir=\${exec_prefix}/${LIBDIR} includedir=\${prefix}/include Name: TensorFlow diff --git a/tensorflow/cc/gradients/math_grad.cc b/tensorflow/cc/gradients/math_grad.cc index 52c177212a..35a01e0341 100644 --- a/tensorflow/cc/gradients/math_grad.cc +++ b/tensorflow/cc/gradients/math_grad.cc @@ -38,6 +38,7 @@ REGISTER_NO_GRADIENT_OP("NotEqual"); REGISTER_NO_GRADIENT_OP("LogicalAnd"); REGISTER_NO_GRADIENT_OP("LogicalOr"); REGISTER_NO_GRADIENT_OP("LogicalNot"); +REGISTER_NO_GRADIENT_OP("Floor"); // Conjugate helper function returns the conjugate of an Output if it // is complex valued. diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc index 0cb3132e94..c73482d5f4 100644 --- a/tensorflow/cc/gradients/nn_grad.cc +++ b/tensorflow/cc/gradients/nn_grad.cc @@ -255,6 +255,53 @@ Status LRNGradHelper(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("LRN", LRNGradHelper); +Status SoftplusGradHelper(const Scope& scope, const Operation& op, + const std::vector<Output>& grad_inputs, + std::vector<Output>* grad_outputs) { + auto dx = internal::SoftplusGrad(scope, grad_inputs[0], op.input(0)); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("Softplus", SoftplusGradHelper); + +Status SoftsignGradHelper(const Scope& scope, const Operation& op, + const std::vector<Output>& grad_inputs, + std::vector<Output>* grad_outputs) { + auto dx = internal::SoftsignGrad(scope, grad_inputs[0], op.input(0)); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("Softsign", SoftsignGradHelper); + +Status FractionalAvgPoolGradHelper(const Scope& scope, const Operation& op, + const std::vector<Output>& grad_inputs, + std::vector<Output>* grad_outputs) { + bool overlapping; + TF_RETURN_IF_ERROR( + GetNodeAttr(op.output(0).node()->attrs(), "overlapping", &overlapping)); + auto dx = internal::FractionalAvgPoolGrad( + scope, Shape(scope, op.input(0), Shape::OutType(DT_INT64)), + grad_inputs[0], op.output(1), op.output(2), + internal::FractionalAvgPoolGrad::Overlapping(overlapping)); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("FractionalAvgPool", FractionalAvgPoolGradHelper); + +Status FractionalMaxPoolGradHelper(const Scope& scope, const Operation& op, + const std::vector<Output>& grad_inputs, + std::vector<Output>* grad_outputs) { + bool overlapping; + TF_RETURN_IF_ERROR( + GetNodeAttr(op.output(0).node()->attrs(), "overlapping", &overlapping)); + auto dx = internal::FractionalMaxPoolGrad( + scope, op.input(0), op.output(0), grad_inputs[0], op.output(1), + op.output(2), internal::FractionalMaxPoolGrad::Overlapping(overlapping)); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("FractionalMaxPool", FractionalMaxPoolGradHelper); + } // anonymous namespace } // namespace ops } // namespace tensorflow diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index c4eba7ecb0..b4d457a9d1 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -28,6 +28,8 @@ namespace { using ops::BiasAdd; using ops::Conv2D; using ops::Elu; +using ops::FractionalAvgPool; +using ops::FractionalMaxPool; using ops::L2Loss; using ops::LogSoftmax; using ops::LRN; @@ -41,6 +43,8 @@ using ops::Relu; using ops::Relu6; using ops::Selu; using ops::Softmax; +using ops::Softplus; +using ops::Softsign; class NNGradTest : public ::testing::Test { protected: @@ -71,22 +75,30 @@ class NNGradTest : public ::testing::Test { EXPECT_LT(max_error, 1e-3); } - // Sets tensor with random values, ensuring that the max value is largest by - // a reasonable amount. - // This is an issue for MaxPool, MaxPoolV2 and MaxPool3D, in which - // perturbations by the numeric gradient computation in the gradient checker - // can change the max value if values are too close together. + // Sets tensor with random values, ensuring that every pair of elements are at + // least a reasonable amount apart. + // This is an issue for max pooling operations, in which perturbations by the + // numeric gradient computation in the gradient checker can change the max + // value if a pool has values that are too close together. template <typename T> - void SetRandomValuesWithBumpedMax(Tensor* tensor) { + void SetRandomValuesForMaxPooling(Tensor* tensor) { auto tensor_flat = tensor->flat<T>(); - tensor_flat.setRandom(); - int32 max_index = 0; - for (size_t i = 1; i < tensor->NumElements(); i++) { - if (tensor_flat(i) > tensor_flat(max_index)) { - max_index = i; - } + // First set the array to an increasing sequence of values spaced + // a reasonable amount apart + T cur = 0; + for (size_t i = 0; i < tensor->NumElements(); i++) { + tensor_flat(i) = cur; + cur += 5e-2; + } + // Fischer-Yates shuffle the array + for (size_t i = tensor->NumElements() - 1; i >= 1; i--) { + // j <- random integer 0 <= j <= i + size_t j = random::New64() % (i + 1); + // swap values at i, j + T tmp = tensor_flat(i); + tensor_flat(i) = tensor_flat(j); + tensor_flat(j) = tmp; } - tensor_flat(max_index) += 1e-2; } Scope scope_; @@ -189,7 +201,7 @@ TEST_F(NNGradTest, MaxPoolGradHelper) { const std::vector<int> strides{1, 2, 2, 1}; auto y = MaxPool(scope_, x, ksize, strides, "VALID"); Tensor x_init_value = Tensor(DT_FLOAT, x_shape); - SetRandomValuesWithBumpedMax<float>(&x_init_value); + SetRandomValuesForMaxPooling<float>(&x_init_value); RunTest(x, x_init_value, y, y_shape); } @@ -202,7 +214,7 @@ TEST_F(NNGradTest, MaxPoolGradV2Helper) { Tensor strides = test::AsTensor<int>({1, 2, 2, 1}, {4}); auto y = MaxPoolV2(scope_, x, ksize, strides, "VALID"); Tensor x_init_value = Tensor(DT_FLOAT, x_shape); - SetRandomValuesWithBumpedMax<float>(&x_init_value); + SetRandomValuesForMaxPooling<float>(&x_init_value); RunTest(x, x_init_value, y, y_shape); } @@ -215,7 +227,7 @@ TEST_F(NNGradTest, MaxPool3DGradHelper) { const std::vector<int> strides{1, 3, 3, 3, 1}; auto y = MaxPool3D(scope_, x, ksize, strides, "VALID"); Tensor x_init_value = Tensor(DT_FLOAT, x_shape); - SetRandomValuesWithBumpedMax<float>(&x_init_value); + SetRandomValuesForMaxPooling<float>(&x_init_value); RunTest(x, x_init_value, y, y_shape); } @@ -248,5 +260,45 @@ TEST_F(NNGradTest, LRN){ RunTest(x, x_shape, y, x_shape); } +TEST_F(NNGradTest, SoftplusGrad) { + TensorShape shape({3, 7}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); + auto y = Softplus(scope_, x); + RunTest(x, shape, y, shape); +} + +TEST_F(NNGradTest, SoftsignGrad) { + TensorShape shape({3, 7}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); + auto y = Softsign(scope_, x); + RunTest(x, shape, y, shape); +} + +TEST_F(NNGradTest, FractionalAvgPoolGradHelper) { + TensorShape x_shape({1, 3, 7, 1}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + // Force consistent pooling regions for unit testing. + auto y = FractionalAvgPool( + scope_, x, {1, 1.2, 1.9, 1}, + FractionalAvgPool::Deterministic(true).Overlapping(true).Seed(1).Seed2( + 2)); + TensorShape y_shape({1, 2, 3, 1}); + RunTest(x, x_shape, y.output, y_shape); +} + +TEST_F(NNGradTest, FractionalMaxPoolGradHelper) { + TensorShape x_shape({1, 3, 7, 1}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + // Force consistent pooling regions for unit testing. + auto y = FractionalMaxPool( + scope_, x, {1, 1.2, 1.9, 1}, + FractionalMaxPool::Deterministic(true).Overlapping(true).Seed(1).Seed2( + 2)); + Tensor x_init_value = Tensor(DT_FLOAT, x_shape); + SetRandomValuesForMaxPooling<float>(&x_init_value); + TensorShape y_shape({1, 2, 3, 1}); + RunTest(x, x_init_value, y.output, y_shape); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/aot/codegen_test_h.golden b/tensorflow/compiler/aot/codegen_test_h.golden index 6e050cf564..6641d45e83 100644 --- a/tensorflow/compiler/aot/codegen_test_h.golden +++ b/tensorflow/compiler/aot/codegen_test_h.golden @@ -56,9 +56,9 @@ namespace bar { // // Memory stats: // arg bytes total: 104 -// arg bytes aligned: 128 +// arg bytes aligned: 192 // temp bytes total: 126 -// temp bytes aligned: 224 +// temp bytes aligned: 320 class MyClass : public tensorflow::XlaCompiledCpuFunction { public: // Number of input arguments for the compiled computation. diff --git a/tensorflow/compiler/aot/embedded_protocol_buffers.h b/tensorflow/compiler/aot/embedded_protocol_buffers.h index ebfe4806c2..4e194a6aba 100644 --- a/tensorflow/compiler/aot/embedded_protocol_buffers.h +++ b/tensorflow/compiler/aot/embedded_protocol_buffers.h @@ -71,7 +71,7 @@ struct ProtobufToEmbed { const ::tensorflow::protobuf::MessageLite* message; }; -// Embeds a a sequence of protocol buffers into an object file. +// Embeds a sequence of protocol buffers into an object file. // // `target_triple` is the target triple for the target architecture for the // generated object file. diff --git a/tensorflow/compiler/aot/runtime.h b/tensorflow/compiler/aot/runtime.h index d085864f00..d1a669ceb1 100644 --- a/tensorflow/compiler/aot/runtime.h +++ b/tensorflow/compiler/aot/runtime.h @@ -25,8 +25,8 @@ namespace tensorflow { namespace tfcompile { namespace runtime { -// Align to 32-bytes, to mimic tensorflow::Allocator::kAllocatorAlignment. -static constexpr size_t kAlign = 32; +// Align to 64-bytes, to mimic tensorflow::Allocator::kAllocatorAlignment. +static constexpr size_t kAlign = 64; // aligned_buffer_bytes returns the sum of each size in `sizes`, skipping -1 // values. There are `n` entries in `sizes`. Each buffer is aligned to kAlign diff --git a/tensorflow/compiler/aot/runtime_test.cc b/tensorflow/compiler/aot/runtime_test.cc index 6d603a02eb..06ec623eb2 100644 --- a/tensorflow/compiler/aot/runtime_test.cc +++ b/tensorflow/compiler/aot/runtime_test.cc @@ -24,7 +24,7 @@ namespace runtime { namespace { TEST(Runtime, AlignmentValue) { - // We've chosen 32 byte alignment for the tfcompile runtime to mimic the + // We've chosen 64 byte alignment for the tfcompile runtime to mimic the // regular tensorflow allocator, which was chosen to play nicely with Eigen. // The tfcompile runtime also has a requirement that comes from the xla // generated code, on the relation: buffer_size >= 16 ? 2 * sizeof(void*) : 8 @@ -39,13 +39,13 @@ TEST(Runtime, AlignedBufferBytes) { EXPECT_EQ(aligned_buffer_bytes(sizesA, 1), 0); static constexpr intptr_t sizesB[1] = {3}; - EXPECT_EQ(aligned_buffer_bytes(sizesB, 1), 32); + EXPECT_EQ(aligned_buffer_bytes(sizesB, 1), 64); static constexpr intptr_t sizesC[1] = {32}; - EXPECT_EQ(aligned_buffer_bytes(sizesC, 1), 32); + EXPECT_EQ(aligned_buffer_bytes(sizesC, 1), 64); static constexpr intptr_t sizesD[7] = {1, -1, 32, -1, 64, 2, 3}; - EXPECT_EQ(aligned_buffer_bytes(sizesD, 7), 192); + EXPECT_EQ(aligned_buffer_bytes(sizesD, 7), 320); } void* add_ptr(void* base, uintptr_t delta) { @@ -101,11 +101,11 @@ TEST(Runtime, MallocFreeContiguousBuffers) { EXPECT_NE(base, nullptr); EXPECT_EQ(bufD[0], add_ptr(base, 0)); EXPECT_EQ(bufD[1], nullptr); - EXPECT_EQ(bufD[2], add_ptr(base, 32)); + EXPECT_EQ(bufD[2], add_ptr(base, 64)); EXPECT_EQ(bufD[3], nullptr); - EXPECT_EQ(bufD[4], add_ptr(base, 64)); - EXPECT_EQ(bufD[5], add_ptr(base, 128)); - EXPECT_EQ(bufD[6], add_ptr(base, 160)); + EXPECT_EQ(bufD[4], add_ptr(base, 128)); + EXPECT_EQ(bufD[5], add_ptr(base, 192)); + EXPECT_EQ(bufD[6], add_ptr(base, 256)); for (int i = 0; i < 7; ++i) { const intptr_t size = sizesD[i]; if (size != -1) { diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index d82922a359..1067b38f93 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -178,6 +178,7 @@ cc_library( ":runtime_matmul", ":runtime_matmul_mkl", ":runtime_single_threaded_conv2d", + ":runtime_single_threaded_fft", ":runtime_single_threaded_matmul", "@llvm//:execution_engine", "@llvm//:core", @@ -516,7 +517,6 @@ cc_library( deps = [ "//tensorflow/compiler/xla:executable_run_options", "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/core:framework", "//tensorflow/core:framework_lite", "//third_party/eigen3", ], @@ -579,6 +579,22 @@ cc_library( ) cc_library( + name = "runtime_single_threaded_fft", + srcs = [ + "runtime_fft_impl.h", + "runtime_single_threaded_fft.cc", + ], + hdrs = ["runtime_single_threaded_fft.h"], + copts = runtime_copts(), + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/core:framework_lite", + "//third_party/eigen3", + ], +) + +cc_library( name = "runtime_single_threaded_matmul", srcs = ["runtime_single_threaded_matmul.cc"], hdrs = ["runtime_single_threaded_matmul.h"], diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc index 215405f680..54c52bc08f 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc @@ -51,6 +51,8 @@ extern const char* const kEigenConvF16SymbolName = extern const char* const kEigenConvF32SymbolName = "__xla_cpu_runtime_EigenConvF32"; extern const char* const kEigenFftSymbolName = "__xla_cpu_runtime_EigenFft"; +extern const char* const kEigenSingleThreadedFftSymbolName = + "__xla_cpu_runtime_EigenSingleThreadedFft"; extern const char* const kEigenSingleThreadedMatMulF16SymbolName = "__xla_cpu_runtime_EigenSingleThreadedMatMulF16"; extern const char* const kEigenSingleThreadedMatMulF32SymbolName = diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h index 1dce6efa5c..aa0e967123 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h @@ -52,6 +52,7 @@ extern const char* const kMKLSingleThreadedMatMulF64SymbolName; extern const char* const kEigenConvF16SymbolName; extern const char* const kEigenConvF32SymbolName; extern const char* const kEigenFftSymbolName; +extern const char* const kEigenSingleThreadedFftSymbolName; extern const char* const kEigenSingleThreadedMatMulF16SymbolName; extern const char* const kEigenSingleThreadedMatMulF32SymbolName; extern const char* const kEigenSingleThreadedMatMulF64SymbolName; diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 2c20be155f..758b8c62b4 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -1172,7 +1172,13 @@ Status IrEmitter::HandleFft(HloInstruction* fft) { {int8_ptr_type, int8_ptr_type, int8_ptr_type, int32_type, int32_type, int64_type, int64_type, int64_type, int64_type}, /*isVarArg=*/false); - const char* fn_name = runtime::kEigenFftSymbolName; + + bool multi_threaded_eigen = + hlo_module_config_.debug_options().xla_cpu_multi_thread_eigen(); + const char* fn_name = multi_threaded_eigen + ? runtime::kEigenFftSymbolName + : runtime::kEigenSingleThreadedFftSymbolName; + llvm::Function* fft_func = llvm::cast<llvm::Function>( module_->getOrInsertFunction(fn_name, fft_type)); fft_func->setCallingConv(llvm::CallingConv::C); diff --git a/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h b/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h index 984cb0616e..0bf693edd0 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h +++ b/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h @@ -21,8 +21,6 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/framework/numeric_types.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/platform/types.h" // 'tensorflow' namespace is used so that int64 and other types don't require @@ -71,11 +69,9 @@ void EigenFftR2C(const EigenDevice& device, complex64* out, float* operand, in_dims[0] = input_batch; Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> out_dims; out_dims[0] = input_batch; - TensorShape temp_shape{input_batch}; for (int i = 0; i < FFTRank; i++) { in_dims[i + 1] = fft_shape[i]; out_dims[i + 1] = i == FFTRank - 1 ? fft_shape[i] / 2 + 1 : fft_shape[i]; - temp_shape.AddDim(fft_shape[i]); } const Eigen::TensorMap<Eigen::Tensor<float, FFTRank + 1, Eigen::RowMajor>, Eigen::Aligned> @@ -88,8 +84,8 @@ void EigenFftR2C(const EigenDevice& device, complex64* out, float* operand, const auto axes = Eigen::ArrayXi::LinSpaced(FFTRank, 1, FFTRank); // Compute the full FFT using a temporary tensor. - Tensor temp(DataTypeToEnum<complex64>::v(), temp_shape); - auto full_fft = temp.flat_inner_dims<complex64, FFTRank + 1>(); + Eigen::Tensor<complex64, FFTRank + 1, Eigen::RowMajor> full_fft(in_dims); + const Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> zero_start_indices; full_fft.device(device) = input.template fft<Eigen::BothParts, Eigen::FFT_FORWARD>(axes); @@ -112,11 +108,9 @@ void EigenFftC2R(const EigenDevice& device, float* out, complex64* operand, in_dims[0] = input_batch; Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> out_dims; out_dims[0] = input_batch; - TensorShape temp_shape{input_batch}; for (int i = 0; i < FFTRank; i++) { in_dims[i + 1] = i == FFTRank - 1 ? fft_shape[i] / 2 + 1 : fft_shape[i]; out_dims[i + 1] = fft_shape[i]; - temp_shape.AddDim(fft_shape[i]); } const Eigen::TensorMap<Eigen::Tensor<complex64, FFTRank + 1, Eigen::RowMajor>, Eigen::Aligned> @@ -129,8 +123,7 @@ void EigenFftC2R(const EigenDevice& device, float* out, complex64* operand, // region we will slice from input given fft_shape. We slice input to // fft_shape on its inner-most dimensions, except the last (which we // slice to fft_shape[-1] / 2 + 1). - Tensor temp(DataTypeToEnum<complex64>::v(), temp_shape); - auto full_fft = temp.flat_inner_dims<complex64, FFTRank + 1>(); + Eigen::Tensor<complex64, FFTRank + 1, Eigen::RowMajor> full_fft(out_dims); // Calculate the starting point and range of the source of // negative frequency part. @@ -179,7 +172,6 @@ template <int FFTRank, typename EigenDevice> void EigenFftWithRank(const EigenDevice& device, void* out, void* operand, int32 fft_type, int64 input_batch, int64 fft_length0, int64 fft_length1, int64 fft_length2) { - CHECK(::xla::FftType_IsValid(fft_type)) << fft_type; switch (fft_type) { case ::xla::FftType::FFT: EigenFftC2C<true, FFTRank, EigenDevice>( @@ -204,7 +196,8 @@ void EigenFftWithRank(const EigenDevice& device, void* out, void* operand, input_batch, fft_length0, fft_length1, fft_length2); break; default: - LOG(FATAL) << "Unsupported FFT type: " << fft_type; + // Unsupported FFT type + abort(); } } @@ -230,7 +223,8 @@ void EigenFftImpl(const EigenDevice& device, void* out, void* operand, fft_length1, fft_length2); break; default: - LOG(FATAL) << "Unsupported FFT rank " << fft_rank; + // Unsupported FFT rank + abort(); } } diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.cc b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.cc new file mode 100644 index 0000000000..2613ddb127 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.cc @@ -0,0 +1,32 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h" + +#include "tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h" +#include "tensorflow/core/platform/dynamic_annotations.h" +#include "tensorflow/core/platform/types.h" + +using tensorflow::int32; +using tensorflow::int64; + +TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenSingleThreadedFft( + const void* run_options_ptr, void* out, void* operand, int32 fft_type, + int32 fft_rank, int64 input_batch, int64 fft_length0, int64 fft_length1, + int64 fft_length2) { + tensorflow::xla::EigenFftImpl(Eigen::DefaultDevice(), out, operand, fft_type, + fft_rank, input_batch, fft_length0, fft_length1, + fft_length2); +} diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h new file mode 100644 index 0000000000..dcd133d012 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h @@ -0,0 +1,31 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_SINGLE_THREADED_FFT_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_SINGLE_THREADED_FFT_H_ + +#include "tensorflow/core/platform/types.h" + +extern "C" { + +extern void __xla_cpu_runtime_EigenSingleThreadedFft( + const void* /* xla::ExecutableRunOptions* */ run_options_ptr, void* out, + void* operand, tensorflow::int32 fft_type, tensorflow::int32 fft_rank, + tensorflow::int64 input_batch, tensorflow::int64 fft_length0, + tensorflow::int64 fft_length1, tensorflow::int64 fft_length2); + +} // extern "C" + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_SINGLE_THREADED_FFT_H_ diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc index 8d8c5e4c44..c4c90515ac 100644 --- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc +++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc @@ -38,6 +38,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/runtime_matmul.h" #include "tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.h" #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h" +#include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h" #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h" #include "tensorflow/compiler/xla/service/cpu/windows_compatibility.h" #include "tensorflow/compiler/xla/types.h" @@ -202,6 +203,7 @@ bool RegisterKnownJITSymbols() { REGISTER_CPU_RUNTIME_SYMBOL(MKLSingleThreadedMatMulF64); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF16); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32); + REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedFft); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF16); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64); diff --git a/tensorflow/compiler/xla/service/pattern_matcher.h b/tensorflow/compiler/xla/service/pattern_matcher.h index d3bc47e61e..2515222cf2 100644 --- a/tensorflow/compiler/xla/service/pattern_matcher.h +++ b/tensorflow/compiler/xla/service/pattern_matcher.h @@ -204,7 +204,7 @@ class LayoutPattern { // Modifies the pattern to match only if the layout equals the given proto. // The layout must outlive the returned pattern. constexpr LayoutPattern<LayoutType, LayoutPatternEqualImpl<Impl>> EqualTo( - const Layout* layout) const { + const ::xla::Layout* layout) const { return LayoutPattern<LayoutType, LayoutPatternEqualImpl<Impl>>( LayoutPatternEqualImpl<Impl>(impl_, layout), matched_layout_); } diff --git a/tensorflow/compiler/xla/service/tuple_simplifier.cc b/tensorflow/compiler/xla/service/tuple_simplifier.cc index e536c8afbf..77bdcc9de0 100644 --- a/tensorflow/compiler/xla/service/tuple_simplifier.cc +++ b/tensorflow/compiler/xla/service/tuple_simplifier.cc @@ -30,10 +30,17 @@ limitations under the License. namespace xla { +TupleSimplifier::TupleSimplifier(bool exclude_entry_computation) : + exclude_entry_computation_(exclude_entry_computation) {} + StatusOr<bool> TupleSimplifier::Run(HloModule* module) { // Initially add all GTE and Tuple instructions to the worklist. std::queue<HloInstruction*> worklist; for (auto* computation : module->computations()) { + if (exclude_entry_computation_ && + computation == module->entry_computation()) { + continue; + } for (auto* instruction : computation->instructions()) { if (instruction->opcode() == HloOpcode::kTuple || instruction->opcode() == HloOpcode::kGetTupleElement) { diff --git a/tensorflow/compiler/xla/service/tuple_simplifier.h b/tensorflow/compiler/xla/service/tuple_simplifier.h index e5e9b10b5b..7509501883 100644 --- a/tensorflow/compiler/xla/service/tuple_simplifier.h +++ b/tensorflow/compiler/xla/service/tuple_simplifier.h @@ -27,13 +27,20 @@ namespace xla { // the module. class TupleSimplifier : public HloPassInterface { public: - TupleSimplifier() {} + TupleSimplifier() : TupleSimplifier(/*exclude_entry_computation=*/false) {} + explicit TupleSimplifier(bool exclude_entry_computation); ~TupleSimplifier() override {} tensorflow::StringPiece name() const override { return "tuple-simplifier"; } // Run tuple simplification on the given computation. Returns whether the // computation was changed. StatusOr<bool> Run(HloModule* module) override; + + private: + // When set, this pipeline stage will perform optimization of all computations + // apart from the module's entry computation. This is used by Graphcore's + // backend. + bool exclude_entry_computation_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/tuple_simplifier_test.cc b/tensorflow/compiler/xla/service/tuple_simplifier_test.cc index ca9ae91281..d3635eae81 100644 --- a/tensorflow/compiler/xla/service/tuple_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/tuple_simplifier_test.cc @@ -42,6 +42,12 @@ class TupleSimplifierTest : public HloTestBase { TF_ASSERT_OK(changed_status.status()); EXPECT_EQ(change_expected, changed_status.ValueOrDie()); } + void Run(HloModule* module, bool change_expected, bool exclude_entry) { + TupleSimplifier simplifier(exclude_entry); + auto changed_status = simplifier.Run(module); + TF_ASSERT_OK(changed_status.status()); + EXPECT_EQ(change_expected, changed_status.ValueOrDie()); + } const Shape scalar_shape_ = ShapeUtil::MakeShape(F32, {}); const Shape tuple_shape_ = ShapeUtil::MakeTupleShape( @@ -211,5 +217,76 @@ TEST_F(TupleSimplifierTest, IncompatibleTuples) { EXPECT_THAT(computation->root_instruction(), tuple); } +TEST_F(TupleSimplifierTest, CanExcludeEntryComputation) { + // Verify that the root computation can be excluded + auto module = CreateNewModule(); + + HloInstruction* p0; + HloInstruction* p1; + HloComputation* c0; + HloComputation* c1; + HloComputation* entry; + + { + HloComputation::Builder builder(TestName() + "_1"); + p0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape_, "param")); + HloInstruction* gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, p0, 0)); + HloInstruction* gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, p0, 1)); + HloInstruction* gte2 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, p0, 2)); + + builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1, gte2})); + + c0 = module->AddEmbeddedComputation(builder.Build()); + } + { + HloComputation::Builder builder(TestName() + "_2"); + p1 = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape_, "param")); + HloInstruction* gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, p1, 0)); + HloInstruction* gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, p1, 1)); + HloInstruction* gte2 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, p1, 2)); + + builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1, gte2})); + + c1 = module->AddEmbeddedComputation(builder.Build()); + } + { + HloComputation::Builder builder(TestName() + "_Entry"); + HloInstruction* tuple_param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape_, "param")); + HloInstruction* call0 = builder.AddInstruction( + HloInstruction::CreateCall(tuple_shape_, {tuple_param}, c0)); + HloInstruction* call1 = builder.AddInstruction( + HloInstruction::CreateCall(tuple_shape_, {tuple_param}, c1)); + HloInstruction* gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, call0, 0)); + HloInstruction* gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, call1, 1)); + HloInstruction* tuple0 = + builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1})); + HloInstruction* gte2 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, tuple0, 0)); + HloInstruction* gte3 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, tuple0, 1)); + + builder.AddInstruction(HloInstruction::CreateTuple({gte2, gte3})); + + entry = module->AddEntryComputation(builder.Build()); + } + + Run(module.get(), /*change_expected=*/true, /*exclude_entry=*/ true); + + EXPECT_THAT(c0->root_instruction(), p0); + EXPECT_THAT(c1->root_instruction(), p1); + EXPECT_THAT(entry->instruction_count(), 9); +} + } // namespace } // namespace xla diff --git a/tensorflow/contrib/autograph/__init__.py b/tensorflow/contrib/autograph/__init__.py index 637e49c082..dbdbad8f4c 100644 --- a/tensorflow/contrib/autograph/__init__.py +++ b/tensorflow/contrib/autograph/__init__.py @@ -23,6 +23,7 @@ from __future__ import print_function # TODO(mdan): Bring only the relevant symbols to the top level. from tensorflow.contrib.autograph import utils +from tensorflow.contrib.autograph import operators from tensorflow.contrib.autograph.impl.api import convert from tensorflow.contrib.autograph.impl.api import converted_call from tensorflow.contrib.autograph.impl.api import do_not_convert @@ -43,6 +44,8 @@ _allowed_symbols = [ 'do_not_convert', 'to_code', 'to_graph', + # Overloaded operators + 'operators', # Special functions and directives 'set_element_type', 'set_loop_options', diff --git a/tensorflow/contrib/cmake/tf_c.cmake b/tensorflow/contrib/cmake/tf_c.cmake index bda5e26f43..2e0a2fcef4 100644 --- a/tensorflow/contrib/cmake/tf_c.cmake +++ b/tensorflow/contrib/cmake/tf_c.cmake @@ -37,13 +37,15 @@ add_dependencies( tf_core_lib tf_protos_cc) -add_library(tf_c_python_api OBJECT - "${tensorflow_source_dir}/tensorflow/c/python_api.cc" - "${tensorflow_source_dir}/tensorflow/c/python_api.h" -) -add_dependencies( - tf_c_python_api - tf_c - tf_core_lib - tf_core_framework - tf_protos_cc) +if(tensorflow_BUILD_PYTHON_BINDINGS) + add_library(tf_c_python_api OBJECT + "${tensorflow_source_dir}/tensorflow/c/python_api.cc" + "${tensorflow_source_dir}/tensorflow/c/python_api.h" + ) + add_dependencies( + tf_c_python_api + tf_c + tf_core_lib + tf_core_framework + tf_protos_cc) +endif() diff --git a/tensorflow/contrib/cmake/tf_cc_ops.cmake b/tensorflow/contrib/cmake/tf_cc_ops.cmake index f73da0b8ab..6c90cf398c 100644 --- a/tensorflow/contrib/cmake/tf_cc_ops.cmake +++ b/tensorflow/contrib/cmake/tf_cc_ops.cmake @@ -155,7 +155,7 @@ if (WIN32) set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/pywrap_tensorflow_internal.lib") endif() else (WIN32) - set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so") + set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal${CMAKE_SHARED_LIBRARY_SUFFIX}") endif (WIN32) add_custom_target(tf_extension_ops) diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index a0c3ddd28b..9244604489 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -715,7 +715,7 @@ if(WIN32) endif() else() add_custom_command(TARGET pywrap_tensorflow_internal POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal${CMAKE_SHARED_LIBRARY_SUFFIX} ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/_pywrap_tensorflow_internal.so) endif() @@ -832,7 +832,6 @@ add_custom_command(TARGET tf_python_build_pip_package POST_BUILD add_custom_command(TARGET tf_python_copy_scripts_to_destination PRE_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/tensorflow/contrib/testing/python/framework/util_test.py ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/testing/python/framework/) - add_custom_command(TARGET tf_python_build_pip_package POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/tensorflow/tools/pip_package/README ${CMAKE_CURRENT_BINARY_DIR}/tf_python/) diff --git a/tensorflow/contrib/cmake/tools/create_def_file.py b/tensorflow/contrib/cmake/tools/create_def_file.py index cffe069aa3..4f957f1e0b 100644 --- a/tensorflow/contrib/cmake/tools/create_def_file.py +++ b/tensorflow/contrib/cmake/tools/create_def_file.py @@ -44,7 +44,8 @@ UNDNAME = "undname.exe" DUMPBIN = "dumpbin.exe" # Exclude if matched -EXCLUDE_RE = re.compile(r"RTTI|deleting destructor|::internal::") +EXCLUDE_RE = re.compile(r"RTTI|deleting destructor|::internal::|Internal|" + r"python_op_gen_internal|grappler") # Include if matched before exclude INCLUDEPRE_RE = re.compile(r"google::protobuf::internal::ExplicitlyConstructed|" @@ -56,6 +57,10 @@ INCLUDEPRE_RE = re.compile(r"google::protobuf::internal::ExplicitlyConstructed|" r"tensorflow::ops::internal::Enter|" r"tensorflow::strings::internal::AppendPieces|" r"tensorflow::strings::internal::CatPieces|" + r"tensorflow::errors::Internal|" + r"tensorflow::Tensor::CopyFromInternal|" + r"tensorflow::kernel_factory::" + r"OpKernelRegistrar::InitInternal|" r"tensorflow::io::internal::JoinPathImpl") # Include if matched after exclude @@ -64,7 +69,7 @@ INCLUDE_RE = re.compile(r"^(TF_\w*)$|" r"tensorflow::|" r"functor::|" r"\?nsync_|" - r"perftools::gputools") + r"stream_executor::") # We want to identify data members explicitly in the DEF file, so that no one # can implicitly link against the DLL if they use one of the variables exported diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py index 45760a29ee..795f1993ba 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py @@ -151,16 +151,24 @@ class SinhArcsinhBijectorTest(test.TestCase): self.assertAllClose(y, bijector.forward(x).eval(), rtol=1e-4, atol=0.) self.assertAllClose(x, bijector.inverse(y).eval(), rtol=1e-4, atol=0.) - # Do the numpy calculation in float128 to avoid inf/nan. - y_float128 = np.float128(y) - self.assertAllClose( - np.log(np.cosh( - np.arcsinh(y_float128) / tailweight - skewness) / np.sqrt( - y_float128**2 + 1)) - - np.log(tailweight), - bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(), - rtol=1e-4, - atol=0.) + # On IBM PPC systems, longdouble (np.float128) is same as double except that it can have more precision. + # Type double being of 8 bytes, can't hold square of max of float64 (which is also 8 bytes) and + # below test fails due to overflow error giving inf. So this check avoids that error by skipping square + # calculation and corresponding assert. + + if np.amax(y) <= np.sqrt(np.finfo(np.float128).max) and \ + np.fabs(np.amin(y)) <= np.sqrt(np.fabs(np.finfo(np.float128).min)): + + # Do the numpy calculation in float128 to avoid inf/nan. + y_float128 = np.float128(y) + self.assertAllClose( + np.log(np.cosh( + np.arcsinh(y_float128) / tailweight - skewness) / np.sqrt( + y_float128**2 + 1)) - + np.log(tailweight), + bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(), + rtol=1e-4, + atol=0.) self.assertAllClose( -bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(), bijector.forward_log_det_jacobian(x, event_ndims=0).eval(), diff --git a/tensorflow/contrib/eager/python/datasets.py b/tensorflow/contrib/eager/python/datasets.py index d7909dd5a2..adf92c27ea 100644 --- a/tensorflow/contrib/eager/python/datasets.py +++ b/tensorflow/contrib/eager/python/datasets.py @@ -106,7 +106,8 @@ class Iterator(iterator_ops.EagerIterator, checkpointable.CheckpointableBase): target_device=target, buffer_size=10, container="", - shared_name=_generate_shared_name("function_buffer_resource")) + shared_name=_generate_shared_name( + "contrib_eager_iterator_function_buffer_resource")) self._buffer_resource_deleter = resource_variable_ops.EagerResourceDeleter( # pylint: disable=line-too-long handle=self._buffer_resource_handle, handle_device=self._device) diff --git a/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb index 4fe3a0e3f3..5749f22ac5 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb @@ -68,7 +68,7 @@ "# simply construct the object. Most layers take as a first argument the number\n", "# of output dimensions / channels.\n", "layer = tf.keras.layers.Dense(100)\n", - "# The number of input dimensionss is often unnecessary, as it can be inferred\n", + "# The number of input dimensions is often unnecessary, as it can be inferred\n", "# the first time the layer is used, but it can be provided if you want to \n", "# specify it manually, which is useful in some complex models.\n", "layer = tf.keras.layers.Dense(10, input_shape=(None, 5))" @@ -267,7 +267,7 @@ " * `build`, where you know the shapes of the input tensors and can do the rest of the initialization\n", " * `call`, where you do the forward computation\n", "\n", - "Note that you don't have to wait until `build` is called to create your variables, you can also create them in `__init__`. However, the advantage of creating them in `build` is that it enables late variable creation based on the shape of the inputs the layer will operate on. On the other hand, creating variables in `__init__` would mean that shapes requires to create the variables will need to be explicitly specified." + "Note that you don't have to wait until `build` is called to create your variables, you can also create them in `__init__`. However, the advantage of creating them in `build` is that it enables late variable creation based on the shape of the inputs the layer will operate on. On the other hand, creating variables in `__init__` would mean that shapes required to create the variables will need to be explicitly specified." ] }, { diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py index 84a413c791..05bcdac2ca 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py @@ -346,7 +346,8 @@ def sequence_numeric_column( key, shape=(1,), default_value=0., - dtype=dtypes.float32): + dtype=dtypes.float32, + normalizer_fn=None): """Returns a feature column that represents sequences of numeric data. Example: @@ -370,6 +371,12 @@ def sequence_numeric_column( default_value: A single value compatible with `dtype` that is used for padding the sparse data into a dense `Tensor`. dtype: The type of values. + normalizer_fn: If not `None`, a function that can be used to normalize the + value of the tensor after `default_value` is applied for parsing. + Normalizer function takes the input `Tensor` as its argument, and returns + the output `Tensor`. (e.g. lambda x: (x - 3.0) / 4.2). Please note that + even though the most common use case of this function is normalization, it + can be used for any kind of Tensorflow transformations. Returns: A `_SequenceNumericColumn`. @@ -383,12 +390,16 @@ def sequence_numeric_column( if not (dtype.is_integer or dtype.is_floating): raise ValueError('dtype must be convertible to float. ' 'dtype: {}, key: {}'.format(dtype, key)) + if normalizer_fn is not None and not callable(normalizer_fn): + raise TypeError( + 'normalizer_fn must be a callable. Given: {}'.format(normalizer_fn)) return _SequenceNumericColumn( key, shape=shape, default_value=default_value, - dtype=dtype) + dtype=dtype, + normalizer_fn=normalizer_fn) def _assert_all_equal_and_return(tensors, name=None): @@ -407,7 +418,7 @@ class _SequenceNumericColumn( fc._SequenceDenseColumn, collections.namedtuple( '_SequenceNumericColumn', - ['key', 'shape', 'default_value', 'dtype'])): + ['key', 'shape', 'default_value', 'dtype', 'normalizer_fn'])): """Represents sequences of numeric data.""" @property @@ -419,7 +430,10 @@ class _SequenceNumericColumn( return {self.key: parsing_ops.VarLenFeature(self.dtype)} def _transform_feature(self, inputs): - return inputs.get(self.key) + input_tensor = inputs.get(self.key) + if self.normalizer_fn is not None: + input_tensor = self.normalizer_fn(input_tensor) + return input_tensor @property def _variable_shape(self): diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py index ee74cf56dc..45d7b74046 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py @@ -28,6 +28,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import sparse_ops from tensorflow.python.platform import test from tensorflow.python.training import monitored_session @@ -947,6 +948,7 @@ class SequenceNumericColumnTest(test.TestCase): self.assertEqual((1,), a.shape) self.assertEqual(0., a.default_value) self.assertEqual(dtypes.float32, a.dtype) + self.assertIsNone(a.normalizer_fn) def test_shape_saved_as_tuple(self): a = sfc.sequence_numeric_column('aaa', shape=[1, 2]) @@ -965,6 +967,10 @@ class SequenceNumericColumnTest(test.TestCase): ValueError, 'dtype must be convertible to float'): sfc.sequence_numeric_column('aaa', dtype=dtypes.string) + def test_normalizer_fn_must_be_callable(self): + with self.assertRaisesRegexp(TypeError, 'must be a callable'): + sfc.sequence_numeric_column('aaa', normalizer_fn='NotACallable') + def test_get_sequence_dense_tensor(self): sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[0.], [1]] @@ -985,6 +991,41 @@ class SequenceNumericColumnTest(test.TestCase): self.assertAllEqual( expected_dense_tensor, dense_tensor.eval(session=sess)) + def test_get_sequence_dense_tensor_with_normalizer_fn(self): + + def _increment_two(input_sparse_tensor): + return sparse_ops.sparse_add( + input_sparse_tensor, + sparse_tensor.SparseTensor(((0, 0), (1, 1)), (2.0, 2.0), (2, 2)) + ) + + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0.], [1]] + # example 1, [[10.]] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)) + + # Before _increment_two: + # [[0.], [1.]], + # [[10.], [0.]], + # After _increment_two: + # [[2.], [1.]], + # [[10.], [2.]], + expected_dense_tensor = [ + [[2.], [1.]], + [[10.], [2.]], + ] + numeric_column = sfc.sequence_numeric_column( + 'aaa', normalizer_fn=_increment_two) + + dense_tensor, _ = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_dense_tensor, dense_tensor.eval(session=sess)) + def test_get_sequence_dense_tensor_with_shape(self): """Tests get_sequence_dense_tensor with shape !=(1,).""" sparse_input = sparse_tensor.SparseTensorValue( diff --git a/tensorflow/contrib/ffmpeg/__init__.py b/tensorflow/contrib/ffmpeg/__init__.py index daba965a98..484ffee3e7 100644 --- a/tensorflow/contrib/ffmpeg/__init__.py +++ b/tensorflow/contrib/ffmpeg/__init__.py @@ -28,7 +28,6 @@ from __future__ import print_function from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_audio from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video from tensorflow.contrib.ffmpeg.ffmpeg_ops import encode_audio -from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py index 020b5c99c6..b1b5126d9e 100644 --- a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py +++ b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py @@ -21,7 +21,6 @@ from __future__ import print_function from tensorflow.contrib.ffmpeg.ops import gen_decode_audio_op_py from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py from tensorflow.contrib.ffmpeg.ops import gen_encode_audio_op_py -from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py from tensorflow.contrib.util import loader from tensorflow.python.framework import ops from tensorflow.python.platform import resource_loader diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py index 10d1ecc738..dc49383c5c 100644 --- a/tensorflow/contrib/framework/__init__.py +++ b/tensorflow/contrib/framework/__init__.py @@ -119,14 +119,13 @@ from tensorflow.python.framework.smart_cond import smart_cond from tensorflow.python.framework.smart_cond import smart_constant_value from tensorflow.python.framework.tensor_spec import BoundedTensorSpec from tensorflow.python.framework.tensor_spec import TensorSpec -from tensorflow.python.ops.array_ops import broadcast_to from tensorflow.python.ops.init_ops import convolutional_delta_orthogonal from tensorflow.python.ops.init_ops import convolutional_orthogonal_1d from tensorflow.python.ops.init_ops import convolutional_orthogonal_2d from tensorflow.python.ops.init_ops import convolutional_orthogonal_3d from tensorflow.python.util.all_util import remove_undocumented -_allowed_symbols = ['nest', 'broadcast_to'] +_allowed_symbols = ['nest'] _nest_allowed_symbols = [ 'assert_same_structure', 'is_sequence', diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py index 65cb94b5a4..a955e21b72 100644 --- a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py +++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py @@ -301,8 +301,8 @@ class FusedConv2DBiasActivationTest(test.TestCase): conv = tensors[i] value = values[i] ref_value = ref_values[i] - print("expected = ", ref_value) - print("actual = ", value) + tf_logging.info("expected = ", ref_value) + tf_logging.info("actual = ", value) tol = 1e-5 if value.dtype == np.float16: tol = 1e-3 @@ -843,7 +843,8 @@ class FusedConvInt8Tests(test.TestCase): vertical_stride, padding_type) output_width = CalculateConvolvedOutputDim(input_width, filter_width, horizontal_stride, padding_type) - print("output_height=", output_height, ", output_width=", output_width) + tf_logging.info("output_height=", output_height, ", output_width=", + output_width) side_input, _, _ = gen_array_ops.quantize_v2( random_ops.random_uniform( @@ -880,8 +881,8 @@ class FusedConvInt8Tests(test.TestCase): with self.test_session( use_gpu=True, config=NoMemoryOptimizationConfig()) as sess: actual_y, expected_y = sess.run([actual, expected]) - print("actual_y = ", actual_y) - print("expected_y = ", expected_y) + tf_logging.info("actual_y = ", actual_y) + tf_logging.info("expected_y = ", expected_y) self.assertTrue(np.array_equal(actual_y, expected_y)) def testFusedConvInt8(self): diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c b/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c index 6a5d982dc8..2e5c84704f 100644 --- a/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c +++ b/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c @@ -19,7 +19,7 @@ limitations under the License. #include "hexagon_controller.h" -#include <malloc.h> +#include <stdlib.h> #include <stdio.h> #include "adspmsgd.h" diff --git a/tensorflow/contrib/lite/download_dependencies.sh b/tensorflow/contrib/lite/download_dependencies.sh index 436c3e1d4c..840015a7fa 100755 --- a/tensorflow/contrib/lite/download_dependencies.sh +++ b/tensorflow/contrib/lite/download_dependencies.sh @@ -30,9 +30,7 @@ if [ ! -f $BZL_FILE_PATH ]; then fi EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" -# TODO (yongtang): Replace the following with 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' once -# the archive has been propagated in mirror.bazel.build. -GEMMLOWP_URL="$(grep -o 'https://github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" +GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)" NEON_2_SSE_URL="https://github.com/intel/ARM_NEON_2_x86_SSE/archive/master.zip" diff --git a/tensorflow/contrib/lite/examples/minimal/minimal.cc b/tensorflow/contrib/lite/examples/minimal/minimal.cc index 106e3b0270..8b0ace96cc 100644 --- a/tensorflow/contrib/lite/examples/minimal/minimal.cc +++ b/tensorflow/contrib/lite/examples/minimal/minimal.cc @@ -38,7 +38,7 @@ using namespace tflite; int main(int argc, char *argv[]) { if(argc != 2) { - fprintf(stderr, "Usage: %s <model>\n"); + fprintf(stderr, "minimal <tflite model>\n"); return 1; } const char* filename = argv[1]; diff --git a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md index bb2e615eac..965273f0f0 100644 --- a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md +++ b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md @@ -128,7 +128,6 @@ TensorFlow operation not listed above are likely unsupported. Notably, the following common ops are not supported at the moment: * [tf.depth_to_space](https://www.tensorflow.org/api_docs/python/tf/depth_to_space) -* [tf.gather](https://www.tensorflow.org/api_docs/python/tf/gather) * [tf.image.resize_bilinear](https://www.tensorflow.org/api_docs/python/tf/image/resize_bilinear) * [tf.tanh](https://www.tensorflow.org/api_docs/python/tf/tanh) @@ -306,6 +305,19 @@ Options { } ``` +**GATHER** + +``` +Inputs { + 0: params tensor + 1: indices tensor + 2: axis tensor (optional) +} +Outputs { + 0: a tensor with same type as the params tensor. +} +``` + **GREATER** ``` diff --git a/tensorflow/contrib/lite/java/ovic/README.md b/tensorflow/contrib/lite/java/ovic/README.md index 5efa70987e..26349347fa 100644 --- a/tensorflow/contrib/lite/java/ovic/README.md +++ b/tensorflow/contrib/lite/java/ovic/README.md @@ -2,7 +2,7 @@ This folder contains building code for track one of the [Low Power ImageNet Recognition Challenge workshop at CVPR 2018.](https://rebootingcomputing.ieee.org/home/sitemap/14-lpirc/80-low-power-image-recognition-challenge-lpirc-2018) -## Pre-requesits +## Pre-requisite Follow the steps [here](https://www.tensorflow.org/mobile/tflite/demo_android) to install Tensorflow, Bazel, and the Android NDK and SDK. @@ -49,7 +49,7 @@ Once you have a submission that follows the instructions from the [competition s You can call the validator binary below to verify that your model fits the format requirements. This often helps you to catch size mismatches (e.g. output should be [1, 1001] instead of [1,1,1,1001]). Let say the submission file is located at `/path/to/my_model.lite`, then call: ```sh -bazel build --cxxopt--std=c++11 //tensorflow/contrib/lite/java/ovic:ovic_validator --cxxopt=-Wno-all +bazel build --cxxopt=--std=c++11 //tensorflow/contrib/lite/java/ovic:ovic_validator --cxxopt=-Wno-all bazel-bin/tensorflow/contrib/lite/java/ovic/ovic_validator /path/to/my_model.lite ``` diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index a2f192bbc2..1908f7fa6c 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -1934,7 +1934,7 @@ inline void LstmCell(const float* input_data, const Dims<4>& input_dims, // The quantization of the input, output arrays is as follows: // - The input activations are quantized as uint8 on the interval // [-1, 127/128]. -// The rationale for that is that that is the natural interval for output +// The rationale for that is that is the natural interval for output // activations (see next point) and these need to be concatenated together. // We could accommodate different ranges by re-scaling, but we empirically // found that setting the input activations range to be [-1, 127/128] in the @@ -1999,7 +1999,7 @@ inline void LstmCell(const float* input_data, const Dims<4>& input_dims, // However, for a fixed-point implementation in 16-bit integers, using 5 // integer bits to represent the [-16, 16] range would leave only 11 // fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive -// representable values. Notice that that is higher than the +// representable values. Notice that is higher than the // worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic. // Using [-8, 8] thus seems like the better compromise overall, enjoying // an increment of 2.4e-4 between representable values and a worst-case diff --git a/tensorflow/contrib/lite/python/interpreter.py b/tensorflow/contrib/lite/python/interpreter.py index 9400e757b9..fd90823425 100644 --- a/tensorflow/contrib/lite/python/interpreter.py +++ b/tensorflow/contrib/lite/python/interpreter.py @@ -55,7 +55,7 @@ class Interpreter(object): elif model_content and not model_path: self._interpreter = ( _interpreter_wrapper.InterpreterWrapper_CreateWrapperCPPFromBuffer( - model_content, len(model_content))) + model_content)) if not self._interpreter: raise ValueError( 'Failed to create model from {} bytes'.format(len(model_content))) diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc index f705551fcb..b283551c45 100644 --- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc +++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc @@ -397,9 +397,14 @@ InterpreterWrapper* InterpreterWrapper::CreateWrapperCPPFromFile( } InterpreterWrapper* InterpreterWrapper::CreateWrapperCPPFromBuffer( - const char* data, size_t len) { + PyObject* data) { + char * buf = nullptr; + Py_ssize_t length; + if (PY_TO_CPPSTRING(data, &buf, &length) == -1) { + return nullptr; + } std::unique_ptr<tflite::FlatBufferModel> model = - tflite::FlatBufferModel::BuildFromBuffer(data, len); + tflite::FlatBufferModel::BuildFromBuffer(buf, length); return model ? new InterpreterWrapper(std::move(model)) : nullptr; } diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h index b0ed7c4559..cbeb53bee7 100644 --- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h +++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h @@ -40,8 +40,7 @@ class InterpreterWrapper { static InterpreterWrapper* CreateWrapperCPPFromFile(const char* model_path); // SWIG caller takes ownership of pointer. - static InterpreterWrapper* CreateWrapperCPPFromBuffer(const char* data, - size_t len); + static InterpreterWrapper* CreateWrapperCPPFromBuffer(PyObject* data); ~InterpreterWrapper(); bool AllocateTensors(); diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py index 0913cd2c5c..88dda7290b 100644 --- a/tensorflow/contrib/lite/python/lite.py +++ b/tensorflow/contrib/lite/python/lite.py @@ -34,6 +34,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from six import PY3 + from google.protobuf import text_format as _text_format from google.protobuf.message import DecodeError from tensorflow.contrib.lite.python import lite_constants as constants @@ -54,6 +56,7 @@ from tensorflow.python.framework.importer import import_graph_def from tensorflow.python.ops.variables import global_variables_initializer from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import tag_constants +# from tensorflow.python.util.all_util import remove_undocumented class TocoConverter(object): @@ -203,6 +206,12 @@ class TocoConverter(object): except (_text_format.ParseError, DecodeError): try: print("Ignore 'tcmalloc: large alloc' warnings.") + + if not isinstance(file_content, str): + if PY3: + file_content = file_content.decode('utf-8') + else: + file_content = file_content.encode('utf-8') _text_format.Merge(file_content, graph_def) except (_text_format.ParseError, DecodeError): raise ValueError( @@ -382,3 +391,5 @@ def _freeze_graph(sess, output_tensors): output_arrays) else: return sess.graph_def + +# remove_undocumented(__name__) diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index e33b430937..5c7fa09891 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -178,7 +178,7 @@ ArrayDataType ConvertDataType(tensorflow::DataType dtype) { else if (dtype == DT_STRING) return ArrayDataType::kString; else - LOG(INFO) << "Unsupported data type in placehoder op: " << dtype; + LOG(INFO) << "Unsupported data type in placeholder op: " << dtype; return ArrayDataType::kNone; } diff --git a/tensorflow/contrib/lite/toco/toco_port.cc b/tensorflow/contrib/lite/toco/toco_port.cc index 1b21c8bc60..de76fd4032 100644 --- a/tensorflow/contrib/lite/toco/toco_port.cc +++ b/tensorflow/contrib/lite/toco/toco_port.cc @@ -20,6 +20,12 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" +#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) +namespace std { +double round(double x) { return ::round(x); } +} // namespace std +#endif + namespace toco { namespace port { void CopyToBuffer(const string& src, char* dest) { diff --git a/tensorflow/contrib/lite/toco/toco_port.h b/tensorflow/contrib/lite/toco/toco_port.h index 5c019cb2bf..17f82b9dd7 100644 --- a/tensorflow/contrib/lite/toco/toco_port.h +++ b/tensorflow/contrib/lite/toco/toco_port.h @@ -34,6 +34,24 @@ limitations under the License. #define TFLITE_PROTO_NS google::protobuf #endif +#ifdef __ANDROID__ +#include <sstream> +namespace std { + +template <typename T> +std::string to_string(T value) +{ + std::ostringstream os ; + os << value ; + return os.str() ; +} + +#ifdef __ARM_ARCH_7A__ +double round(double x); +#endif +} +#endif + namespace toco { namespace port { diff --git a/tensorflow/contrib/makefile/compile_nsync.sh b/tensorflow/contrib/makefile/compile_nsync.sh index e8c6edd7ba..a28fc3a87f 100755 --- a/tensorflow/contrib/makefile/compile_nsync.sh +++ b/tensorflow/contrib/makefile/compile_nsync.sh @@ -270,7 +270,7 @@ for arch in $archs; do PLATFORM_LDFLAGS=-pthread MKDEP=${CC} -M -std=c++11 PLATFORM_C=../../platform/c++11/src/nsync_semaphore_mutex.cc \ - ../../platform/c++11/src/per_thread_waiter.cc \ + ../../platform/posix/src/per_thread_waiter.c \ ../../platform/c++11/src/yield.cc \ ../../platform/c++11/src/time_rep_timespec.cc \ ../../platform/c++11/src/nsync_panic.cc diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh index eff9081e35..48953e2e38 100755 --- a/tensorflow/contrib/makefile/download_dependencies.sh +++ b/tensorflow/contrib/makefile/download_dependencies.sh @@ -27,9 +27,7 @@ if [ ! -f $BZL_FILE_PATH ]; then fi EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" -# TODO (yongtang): Replace the following with 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' once -# the archive has been propagated in mirror.bazel.build. -GEMMLOWP_URL="$(grep -o 'https://github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" +GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" NSYNC_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/nsync/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" PROTOBUF_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index 2ed99d50a4..a6be2084aa 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -2503,7 +2503,7 @@ def _compute_recall_at_precision(tp, fp, fn, precision, name): name: An optional variable_scope name. Returns: - The recall at a the given `precision`. + The recall at a given `precision`. """ precisions = math_ops.div(tp, tp + fp + _EPSILON) tf_index = math_ops.argmin( diff --git a/tensorflow/contrib/mpi_collectives/kernels/ring.h b/tensorflow/contrib/mpi_collectives/kernels/ring.h index 1d56d588bc..c001615d3f 100644 --- a/tensorflow/contrib/mpi_collectives/kernels/ring.h +++ b/tensorflow/contrib/mpi_collectives/kernels/ring.h @@ -129,7 +129,7 @@ cudaStream_t CudaStreamForMPI(); * has the fully accumulated Segment 1; and so on. The scatter-reduce is * complete. * - * Next, the allgather distributes these fully accumululated chunks across all + * Next, the allgather distributes these fully accumulated chunks across all * nodes. Communication proceeds in the same ring, once again in N-1 steps. At * the ith step, node j will send chunk (j - i + 1) and receive chunk (j - i). * For example, at the first iteration, the following transfers will occur: diff --git a/tensorflow/contrib/opt/python/training/adamax_test.py b/tensorflow/contrib/opt/python/training/adamax_test.py index 21bf3f5313..915e6504e1 100644 --- a/tensorflow/contrib/opt/python/training/adamax_test.py +++ b/tensorflow/contrib/opt/python/training/adamax_test.py @@ -224,8 +224,10 @@ class AdaMaxOptimizerTest(test.TestCase): var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) # Validate updated params - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0), + rtol=1e-2) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1), + rtol=1e-2) if use_resource: self.assertEqual("var0_%d/AdaMax:0" % (i,), opt.get_slot(var=var0, name="m").name) diff --git a/tensorflow/contrib/opt/python/training/model_average_optimizer.py b/tensorflow/contrib/opt/python/training/model_average_optimizer.py index a7c97a1da2..b6b10e500b 100644 --- a/tensorflow/contrib/opt/python/training/model_average_optimizer.py +++ b/tensorflow/contrib/opt/python/training/model_average_optimizer.py @@ -62,7 +62,7 @@ class ModelAverageCustomGetter(object): """ def __init__(self, worker_device): - """Create a new `ElasticAverageCustomGetter`. + """Create a new `ModelAverageCustomGetter`. Args: worker_device: String. Name of the `worker` job. diff --git a/tensorflow/contrib/periodic_resample/BUILD b/tensorflow/contrib/periodic_resample/BUILD index 6ca7fe8b6e..aad1ca04c5 100644 --- a/tensorflow/contrib/periodic_resample/BUILD +++ b/tensorflow/contrib/periodic_resample/BUILD @@ -6,12 +6,13 @@ exports_files(["LICENSE"]) load( "//tensorflow:tensorflow.bzl", - "py_test", + "tf_cc_test", "tf_gen_op_libs", "tf_custom_op_library", "tf_custom_op_py_library", "tf_gen_op_wrapper_py", ) +load("//tensorflow:tensorflow.bzl", "py_test") cc_library( name = "all_ops", @@ -84,6 +85,23 @@ py_test( ":init_py", "//tensorflow/contrib/util:util_py", "//tensorflow/python:framework_test_lib", + "//tensorflow/python:gradient_checker", + ], +) + +tf_cc_test( + name = "periodic_resample_op_cc_test", + size = "small", + srcs = [ + "ops/array_ops_test.cc", + ], + deps = [ + ":all_ops", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_proto", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", ], ) diff --git a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc index e18923c8aa..514689cf45 100644 --- a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc +++ b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc @@ -22,4 +22,9 @@ namespace tensorflow { REGISTER_KERNEL_BUILDER(Name("PeriodicResample").Device(DEVICE_CPU), PeriodicResampleOp); + +REGISTER_KERNEL_BUILDER(Name("PeriodicResampleOpGrad") + .Device(DEVICE_CPU), + PeriodicResampleOpGrad); + } // namespace tensorflow diff --git a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h index 3ab588c458..42fba81a5c 100644 --- a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h +++ b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h @@ -25,92 +25,202 @@ #include "tensorflow/core/framework/shape_inference.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/util/work_sharder.h" namespace { -template <class IndexVecT, class IndexT> -IndexT compute_input_index( - IndexVecT* target_dimensions, const IndexT& output_index, - const IndexVecT& original_dimensions, const int& adjustable_dimension, - const std::vector<tensorflow::int64>& dimension_ceiling, - const std::vector<tensorflow::int64>& cumulative_dimensions, IndexT* result, - std::vector<IndexT>* output_indices, const int& rank) { - *result = 0; - output_indices->clear(); +// Computes input tensor index for given output index during forward +// propagation through periodic_resample operation. +class InputIndexer { + public: + InputIndexer(const std::vector<tensorflow::int64>& output_dimensions, + const tensorflow::TensorShape& input_shape, + int adjustable_dimension) + : output_dimensions_(output_dimensions), + adjustable_dimension_(adjustable_dimension), + rank_(input_shape.dims()), + linear_output_index_(0), + linear_input_index_(0), + adjustable_dimension_carriage_sum_(0) { + auto input_dimensions = TensorShapeToVector(input_shape); + // factors by which input_dimensions increases/decreases w.r.t. + // output_dimensions + dimension_ceiling_ = + ComputeDimensionCeiling(output_dimensions, input_dimensions); + cumulative_dimensions_ = ComputeCumulativeDimensions(); + + output_indices_.resize(output_dimensions_.size()); + input_indices_.resize(output_dimensions_.size()); + + // Compute index_factors + index_factors_.resize(rank_); + tensorflow::int64 last_index_factor = 1; + for (auto r = rank_ - 1; r >= 0; --r) { + index_factors_[r] = last_index_factor; + last_index_factor *= input_dimensions[r]; + } + } + + tensorflow::int64 linear_input_index() const { return linear_input_index_; } + + void MoveToOutputIndex(tensorflow::int64 output_index); + void IncrementOutputIndex(); + + private: + void RecomputeInputAdjustableDimensionIndex() { + tensorflow::int64 index = adjustable_dimension_carriage_sum_; + index *= output_dimensions_[adjustable_dimension_]; + index += output_indices_[adjustable_dimension_]; + input_indices_[adjustable_dimension_] = index; + } + + std::vector<tensorflow::int64> TensorShapeToVector( + const tensorflow::TensorShape& tensor_shape); + + std::vector<tensorflow::int64> ComputeDimensionCeiling( + const std::vector<tensorflow::int64>& output_dimensions, + const std::vector<tensorflow::int64>& input_dimensions); + + std::vector<tensorflow::int64> ComputeCumulativeDimensions(); + + const std::vector<tensorflow::int64> output_dimensions_; + std::vector<tensorflow::int64> dimension_ceiling_; + std::vector<tensorflow::int64> index_factors_; + std::vector<tensorflow::int64> cumulative_dimensions_; + std::vector<tensorflow::int64> output_indices_; + std::vector<tensorflow::int64> input_indices_; + + const int adjustable_dimension_; + const int rank_; + tensorflow::int64 linear_output_index_; + tensorflow::int64 linear_input_index_; + tensorflow::int64 adjustable_dimension_carriage_sum_; +}; + +void InputIndexer::MoveToOutputIndex(tensorflow::int64 output_index) { + linear_output_index_ = output_index; + linear_input_index_ = 0; // un-rasterize the output index auto last_reduced_i = output_index; - for (auto r = rank - 1; r >= 0; --r) { - (*output_indices)[r] = last_reduced_i % (*target_dimensions)[r]; + for (auto r = rank_ - 1; r >= 0; --r) { + output_indices_[r] = last_reduced_i % output_dimensions_[r]; last_reduced_i = - (last_reduced_i - (*output_indices)[r]) / (*target_dimensions)[r]; + (last_reduced_i - output_indices_[r]) / output_dimensions_[r]; } + tensorflow::int64 carriage_sum = 0; + for (int qi = 0; qi < rank_; ++qi) { + if (qi == adjustable_dimension_) continue; + carriage_sum += cumulative_dimensions_[qi] * + (output_indices_[qi] % dimension_ceiling_[qi]); + } + adjustable_dimension_carriage_sum_ = carriage_sum; + // rasterize the input index - IndexT last_index_factor = 1; - for (auto r = rank - 1; r >= 0; --r) { - IndexT index = 0; - if (r != adjustable_dimension) - index = (*output_indices)[r] / dimension_ceiling[r]; - else { - for (int qi = 0; qi < rank; ++qi) { - if (qi == adjustable_dimension) continue; - index += cumulative_dimensions[qi] * - ((*output_indices)[qi] % dimension_ceiling[qi]); - } - index *= (*target_dimensions)[adjustable_dimension]; - index += (*output_indices)[r]; + for (auto r = rank_ - 1; r >= 0; --r) { + if (r != adjustable_dimension_) { + input_indices_[r] = output_indices_[r] / dimension_ceiling_[r]; + } else { + RecomputeInputAdjustableDimensionIndex(); } - *result += last_index_factor * index; - last_index_factor *= original_dimensions[r]; } + for (auto r = rank_ - 1; r >= 0; --r) { + linear_input_index_ += index_factors_[r] * input_indices_[r]; + } +} + +void InputIndexer::IncrementOutputIndex() { + linear_output_index_++; + for (auto r = rank_ - 1; r >= 0; --r) { + auto old_carriage_sum_increment = + cumulative_dimensions_[r] * + (output_indices_[r] % dimension_ceiling_[r]); + output_indices_[r] = (output_indices_[r] + 1) % output_dimensions_[r]; + if (r != adjustable_dimension_) { + auto new_input_index = output_indices_[r] / dimension_ceiling_[r]; + linear_input_index_ += + (new_input_index - input_indices_[r]) * index_factors_[r]; + + input_indices_[r] = new_input_index; + + auto new_carriage_sum_increment = + cumulative_dimensions_[r] * + (output_indices_[r] % dimension_ceiling_[r]); - return *result; + adjustable_dimension_carriage_sum_ = adjustable_dimension_carriage_sum_ - + old_carriage_sum_increment + + new_carriage_sum_increment; + } + + if (output_indices_[r] != 0) { + // No more carries to higher indices. + break; + } + } + auto old_adjustable_dimension_input_index = + input_indices_[adjustable_dimension_]; + RecomputeInputAdjustableDimensionIndex(); + linear_input_index_ += (input_indices_[adjustable_dimension_] - + old_adjustable_dimension_input_index) * + index_factors_[adjustable_dimension_]; } -template <class InputDataT, - class IndexVecT> // both types are needed here b/c IndexVecT and - // InputDataT are not related - void - fill_periodic_tensor( - tensorflow::OpKernelContext* context, - const IndexVecT& desired_shape, - const tensorflow::Tensor& input_tensor) { - // input is a strided array (last index is fastest, C-ordered) - auto input = input_tensor.flat<InputDataT>(); - const int rank = input_tensor.dims(); - // original and target dimensions - std::vector<tensorflow::int64> original_dimensions(rank), - target_dimensions(rank); - tensorflow::int64 total_size(input_tensor.NumElements()), new_sliced_size(1); - // factors by which original_dimensions increases/decreases w.r.t. - // target_dimensions - std::vector<tensorflow::int64> dimension_ceiling(rank), - cumulative_dimensions(rank); - // index of adjustable dimension - int adjustable_dimension; - tensorflow::TensorShape output_shape; +std::vector<tensorflow::int64> InputIndexer::TensorShapeToVector( + const tensorflow::TensorShape& tensor_shape) { + std::vector<tensorflow::int64> result(tensor_shape.dims()); + int count = 0; + for (const auto dim_info : tensor_shape) { + result[count] = dim_info.size; + ++count; + } + return result; +} - // requires that the rank of the input tensor and length of the desired shape - // are equal - OP_REQUIRES(context, rank == desired_shape.size(), - tensorflow::errors::InvalidArgument( - "periodic_resample expects the rank of the input tensor, ", - rank, ", to be the same as the length of the desired shape, ", - desired_shape.size(), ".")); +std::vector<tensorflow::int64> InputIndexer::ComputeDimensionCeiling( + const std::vector<tensorflow::int64>& output_dimensions, + const std::vector<tensorflow::int64>& input_dimensions) { + std::vector<tensorflow::int64> dimension_ceiling(input_dimensions.size()); + for (size_t i = 0; i < input_dimensions.size(); ++i) { + dimension_ceiling[i] = (output_dimensions[i] + input_dimensions[i] - 1) / + input_dimensions[i]; + } + return dimension_ceiling; +} - bool found = false; - const auto& input_tensor_shape = input_tensor.shape(); +std::vector<tensorflow::int64> InputIndexer::ComputeCumulativeDimensions() { + std::vector<tensorflow::int64> cumulative_dimensions(rank_); + int count = 0; + for (int i = 0; i < rank_; ++i) { + if (count == 0) { + cumulative_dimensions[count] = 1; + } else { + cumulative_dimensions[count] = + cumulative_dimensions[count - 1] * dimension_ceiling_[count - 1]; + } + ++count; + } + return cumulative_dimensions; +} +template <typename IndexVecT> +void process_desired_shape(tensorflow::OpKernelContext* context, + const tensorflow::TensorShape& input_tensor_shape, + const IndexVecT& desired_shape, + int* adjustable_dimension, + std::vector<tensorflow::int64>* target_dimensions, + tensorflow::int64* output_size) { + tensorflow::int64 new_sliced_size = 1; + bool found = false; + const int rank = input_tensor_shape.dims(); for (int i = 0; i < rank; ++i) { - // if (desired_shape(i) < 1) { if (desired_shape[i] < 1) { // only one index can be adjustable OP_REQUIRES(context, !found, tensorflow::errors::InvalidArgument( "periodic_resample expects only " "one index to be marked as adjustable.")); - adjustable_dimension = i; + *adjustable_dimension = i; found = true; } else { OP_REQUIRES( @@ -122,9 +232,8 @@ template <class InputDataT, i, " input tensor has size ", input_tensor_shape.dim_size(i), ", desired shape has size ", desired_shape[i], ".")); - // target_dimensions[i] = desired_shape(i); - target_dimensions[i] = desired_shape[i]; - new_sliced_size *= target_dimensions[i]; + (*target_dimensions)[i] = desired_shape[i]; + new_sliced_size *= (*target_dimensions)[i]; } } // at least one index needs to be adjustable @@ -132,26 +241,50 @@ template <class InputDataT, tensorflow::errors::InvalidArgument( "periodic_resample expects at least " "one index to be marked as adjustable.")); + (*target_dimensions)[*adjustable_dimension] = + input_tensor_shape.num_elements() / new_sliced_size; - int count = 0; - for (const auto dim_info : input_tensor.shape()) { - original_dimensions[count] = dim_info.size; - ++count; - } + *output_size = new_sliced_size * (*target_dimensions)[*adjustable_dimension]; +} - target_dimensions[adjustable_dimension] = total_size / new_sliced_size; +// Heuristic number based on measurements on +// Intel(R) Core(TM) i7-4930K CPU @ 3.40GHz +const tensorflow::int64 costPerFillIndex = 35; - count = 0; - for (int i = 0; i < input_tensor.shape().dims(); ++i) { - dimension_ceiling[count] = tensorflow::int64(std::ceil( - float(target_dimensions[count]) / float(original_dimensions[count]))); - if (count == 0) - cumulative_dimensions[count] = 1; - else - cumulative_dimensions[count] = - cumulative_dimensions[count - 1] * dimension_ceiling[count - 1]; - ++count; - } +enum class Mode { + kForward, + kGradient +}; + +// Computes either periodic_resample operation output or gradients for it, +// depending on |mode|. +// |original_shape| is always shape of input to periodic_resample operation. +// |source_tensor| is either source for periodic_resample (for forward mode) +// or gradients tensor. +// |desired_shape| is always shape, provided by user, to which forward +// propagation attempts resample input tensor. +template <class InputDataT, Mode mode> +void +do_periodic_resample_op(tensorflow::OpKernelContext* context, + const tensorflow::TensorShape& original_shape, + const tensorflow::PartialTensorShape& desired_shape, + const tensorflow::Tensor& source_tensor) { + const int rank = source_tensor.dims(); + + // requires that the rank of the input tensor and length of the desired shape + // are equal + OP_REQUIRES(context, rank == desired_shape.dims(), + tensorflow::errors::InvalidArgument( + "periodic_resample expects the rank of the input tensor, ", + rank, ", to be the same as the length of the desired shape, ", + desired_shape.dims(), ".")); + + std::vector<tensorflow::int64> target_dimensions(rank); + tensorflow::int64 new_size = 0; + // index of adjustable dimension + int adjustable_dimension = 0; + process_desired_shape(context, original_shape, desired_shape.dim_sizes(), + &adjustable_dimension, &target_dimensions, &new_size); // ensure that the new dimension is greater than zero OP_REQUIRES(context, target_dimensions[adjustable_dimension] > 0, @@ -160,11 +293,14 @@ template <class InputDataT, "adjustable dimension, ", adjustable_dimension, ", isn't greater than zero, ", target_dimensions[adjustable_dimension], ".")); - for (int i = 0; i < rank; ++i) { - output_shape.AddDim(target_dimensions[i]); + tensorflow::TensorShape output_shape; + if (mode == Mode::kForward) { + for (int i = 0; i < rank; ++i) { + output_shape.AddDim(target_dimensions[i]); + } + } else { + output_shape = original_shape; } - const auto new_size = - new_sliced_size * target_dimensions[adjustable_dimension]; // Create an output tensor and attach it to the current context tensorflow::Tensor* output_tensor = nullptr; @@ -172,47 +308,73 @@ template <class InputDataT, context->allocate_output(0, output_shape, &output_tensor)); auto output = output_tensor->flat<InputDataT>(); - // memory is allocated for these variables outside the inner loop for - // efficiency (although, I could create a separate class scope for - // this purpose instead) - tensorflow::int64 result = 0; - std::vector<tensorflow::int64> output_indices(target_dimensions.size()); + // input is a strided array (last index is fastest, C-ordered) + auto input = source_tensor.flat<InputDataT>(); // Fill output tensor with periodically resampled input tensor values - for (tensorflow::int64 output_index = 0; output_index < new_size; - ++output_index) { - output(output_index) = input(compute_input_index( - &target_dimensions, output_index, original_dimensions, - adjustable_dimension, dimension_ceiling, cumulative_dimensions, &result, - &output_indices, rank)); - } + InputIndexer input_indexer(target_dimensions, original_shape, + adjustable_dimension); + + auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads()); + auto fill_output_tensor = [&input_indexer, &output, &input]( + tensorflow::int64 start, tensorflow::int64 limit) { + InputIndexer local_indexer(input_indexer); + local_indexer.MoveToOutputIndex(start); + for (tensorflow::int64 output_index = start; output_index < limit; + ++output_index) { + if (mode == Mode::kForward) { + output(output_index) = input(local_indexer.linear_input_index()); + } else { + output(local_indexer.linear_input_index()) = input(output_index); + } + local_indexer.IncrementOutputIndex(); + } + }; + ::tensorflow::Shard(worker_threads.num_threads, worker_threads.workers, + new_size, costPerFillIndex, fill_output_tensor); } +#define DATA_TYPE_SWITCH(data_type, context, CASE) \ + switch (data_type) { \ + CASE(float) \ + CASE(double) \ + CASE(tensorflow::int32) \ + CASE(tensorflow::int64) \ + default: \ + context->CtxFailure(__FILE__, __LINE__, \ + tensorflow::errors::InvalidArgument( \ + "Unsuppored tensor elements type")); \ + break; \ + } + void create_output_tensor( tensorflow::OpKernelContext* context, const tensorflow::Tensor& input_tensor, const tensorflow::DataType& input_tensor_type, - const tensorflow::PartialTensorShape& desired_shape_tensor) { - auto desired_shape = desired_shape_tensor.dim_sizes(); - - // obligatory type switch - switch (input_tensor_type) { - case tensorflow::DataTypeToEnum<float>::value: - fill_periodic_tensor<float>(context, desired_shape, input_tensor); + const tensorflow::PartialTensorShape& desired_shape) { +#define CASE(type) \ + case tensorflow::DataTypeToEnum<type>::value: \ + do_periodic_resample_op<type, Mode::kForward>( \ + context, input_tensor.shape(), desired_shape, input_tensor); \ break; - case tensorflow::DataTypeToEnum<double>::value: - fill_periodic_tensor<double>(context, desired_shape, input_tensor); - break; - case tensorflow::DataTypeToEnum<tensorflow::int32>::value: - fill_periodic_tensor<tensorflow::int32>(context, desired_shape, - input_tensor); - break; - case tensorflow::DataTypeToEnum<tensorflow::int64>::value: - fill_periodic_tensor<tensorflow::int64>(context, desired_shape, - input_tensor); + + DATA_TYPE_SWITCH(input_tensor_type, context, CASE); +#undef CASE +} + +void create_grad_tensor(tensorflow::OpKernelContext* context, + const tensorflow::Tensor& grad_tensor, + const tensorflow::DataType& grad_tensor_type, + const tensorflow::TensorShape& original_shape, + const tensorflow::PartialTensorShape& desired_shape) { +#define CASE(type) \ + case tensorflow::DataTypeToEnum<type>::value: \ + do_periodic_resample_op<type, Mode::kGradient>( \ + context, original_shape, desired_shape, grad_tensor); \ break; - default:; - } + + DATA_TYPE_SWITCH(grad_tensor_type, context, CASE); +#undef CASE } } // namespace @@ -238,4 +400,25 @@ class PeriodicResampleOp : public tensorflow::OpKernel { tensorflow::PartialTensorShape desired_shape; }; +class PeriodicResampleOpGrad : public tensorflow::OpKernel { + public: + explicit PeriodicResampleOpGrad(tensorflow::OpKernelConstruction* context) + : tensorflow::OpKernel(context) { + OP_REQUIRES_OK(context, + context->GetAttr("original_shape", &original_shape)); + OP_REQUIRES_OK(context, context->GetAttr("desired_shape", &desired_shape)); + } + + void Compute(tensorflow::OpKernelContext* context) override { + const tensorflow::Tensor& grad_tensor = context->input(0); + const tensorflow::DataType grad_tensor_type = context->input_dtype(0); + create_grad_tensor(context, grad_tensor, grad_tensor_type, original_shape, + desired_shape); + } + + private: + tensorflow::TensorShape original_shape; + tensorflow::PartialTensorShape desired_shape; +}; + #endif // TENSORFLOW_KERNELS_PERIODICRESAMPLE_OP_H_ diff --git a/tensorflow/contrib/periodic_resample/ops/array_ops.cc b/tensorflow/contrib/periodic_resample/ops/array_ops.cc index 82bd796956..fd38cd09b4 100644 --- a/tensorflow/contrib/periodic_resample/ops/array_ops.cc +++ b/tensorflow/contrib/periodic_resample/ops/array_ops.cc @@ -26,7 +26,42 @@ REGISTER_OP("PeriodicResample") .Input("values: T") .Attr("shape: shape") .Output("output: T") - .SetShapeFn(shape_inference::ExplicitShape) + .SetShapeFn([](shape_inference::InferenceContext* c) { + tensorflow::PartialTensorShape desired_shape; + TF_RETURN_IF_ERROR(c->GetAttr("shape", &desired_shape)); + shape_inference::ShapeHandle input_tensor_shape = c->input(0); + shape_inference::DimensionHandle num_input_elements = + c->NumElements(input_tensor_shape); + shape_inference::ShapeHandle result_shape_handle; + if (!shape_inference::InferenceContext::ValueKnown(num_input_elements)) { + TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape( + desired_shape, &result_shape_handle)); + } else { + const int rank = c->Rank(input_tensor_shape); + std::vector<tensorflow::int64> target_dimensions(rank); + tensorflow::int64 new_sliced_size = 1; + int adjustable_dimension = 0; + for (int i = 0; i < rank; ++i) { + if (desired_shape.dim_size(i) < 1) { + adjustable_dimension = i; + } else { + target_dimensions[i] = desired_shape.dim_size(i); + new_sliced_size *= target_dimensions[i]; + } + } + target_dimensions[adjustable_dimension] = + shape_inference::InferenceContext::Value( + num_input_elements) / new_sliced_size; + tensorflow::TensorShape result_shape; + for (int i = 0; i < rank; ++i) { + result_shape.AddDim(target_dimensions[i]); + } + TF_RETURN_IF_ERROR(c->MakeShapeFromTensorShape( + result_shape, &result_shape_handle)); + } + c->set_output(0, result_shape_handle); + return Status::OK(); + }) .Doc(R"doc( Periodically resample elements of a tensor to conform to `shape`. @@ -101,4 +136,20 @@ output: Periodically resampled tensor that has dimensions specified as in )doc"); + +REGISTER_OP("PeriodicResampleOpGrad") + .Attr("T: numbertype") + .Input("grad: T") + .Attr("original_shape: shape") + .Attr("desired_shape: shape") + .Output("grad_values: T") + .SetShapeFn([](shape_inference::InferenceContext* c) { + tensorflow::TensorShape original_shape; + TF_RETURN_IF_ERROR(c->GetAttr("original_shape", &original_shape)); + shape_inference::ShapeHandle s; + TF_RETURN_IF_ERROR(c->MakeShapeFromTensorShape(original_shape, &s)); + c->set_output(0, s); + return Status::OK(); +}); + } // namespace tensorflow diff --git a/tensorflow/contrib/periodic_resample/ops/array_ops_test.cc b/tensorflow/contrib/periodic_resample/ops/array_ops_test.cc new file mode 100644 index 0000000000..43b7c1799f --- /dev/null +++ b/tensorflow/contrib/periodic_resample/ops/array_ops_test.cc @@ -0,0 +1,41 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/shape_inference_testutil.h" +#include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { + +TEST(ArrayOpsTest, PeriodicResample_ShapeFn) { + ShapeInferenceTestOp op("PeriodicResample"); + // Case 1: output shape can be fully inferreed. + PartialTensorShape shape({4, 4, -1}); + TensorShapeProto shape_proto; + shape.AsProto(&shape_proto); + + TF_ASSERT_OK(NodeDefBuilder("test", "PeriodicResample") + .Input({"values", 0, DT_INT32}) + .Attr("shape", shape_proto) + .Finalize(&op.node_def)); + INFER_OK(op, "[2,2,4]", "[4,4,1]"); + // Case 2: output shape can not be inferred - report desired shape. + INFER_OK(op, "[2,2,?]", "[4,4,?]"); +} + +} // end namespace tensorflow diff --git a/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py b/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py index a25de55e18..31a6fe1d94 100644 --- a/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py +++ b/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py @@ -21,8 +21,11 @@ from __future__ import print_function import numpy from tensorflow.contrib.periodic_resample import periodic_resample +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import variables from tensorflow.python.platform import googletest @@ -93,7 +96,6 @@ class PeriodicResampleTest(test_util.TensorFlowTestCase): def testPeriodicResampleErrors(self): input_tensor = numpy.zeros(shape=[1, 2, 2, 4]) with self.test_session(): - variables.global_variables_initializer().run() with self.assertRaisesWithPredicateMatch( errors_impl.InvalidArgumentError, 'Dimension 3 input tensor has size 4, desired shape has size 1'): @@ -103,6 +105,29 @@ class PeriodicResampleTest(test_util.TensorFlowTestCase): '4, to be the same as the length of the desired shape, 3'): periodic_resample(input_tensor, [None, 4, 4]).eval() + def testPeriodicResampleGradient(self): + desired_shape = numpy.array([4, 4, None]) + result_shape = (4, 4, 1) + input_shape = (2, 2, 4) + with self.test_session() as sess: + x = array_ops.placeholder(dtypes.float32, shape=input_shape) + output = periodic_resample(x, desired_shape) + error = gradient_checker.compute_gradient_error( + x, input_shape, output, result_shape) + self.assertLess(error, 1e-4) + + def testPeriodicResampleShapeInference(self): + with self.test_session() as sess: + # Case 1: output shape can be fully inferreed. + x = array_ops.placeholder(dtypes.float32, shape=(2, 2, 4)) + output = periodic_resample(x, [4, 4, None]) + self.assertEqual(output.shape, [4, 4, 1]) + # Case 2: output shape can not be inferred - report desired shape. + x = array_ops.placeholder(dtypes.float32, shape=(2, 2, None)) + output = periodic_resample(x, [4, 4, None]) + self.assertTrue(output.shape.is_compatible_with([4, 4, None])) + self.assertEqual(output.shape[2].value, None) + if __name__ == '__main__': googletest.main() diff --git a/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py b/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py index 348623d8f8..470e300ccb 100644 --- a/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py +++ b/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py @@ -21,11 +21,17 @@ from __future__ import print_function # pylint: disable=unused-import from tensorflow.contrib.periodic_resample.python.ops import gen_periodic_resample_op -from tensorflow.contrib.periodic_resample.python.ops.gen_periodic_resample_op import periodic_resample +from tensorflow.contrib.periodic_resample.python.ops.gen_periodic_resample_op import periodic_resample, periodic_resample_op_grad from tensorflow.contrib.util import loader +from tensorflow.python.framework import ops from tensorflow.python.platform import resource_loader # pylint: enable=unused-import _periodic_resample_op = loader.load_op_library( resource_loader.get_path_to_datafile('_periodic_resample_op.so')) + +@ops.RegisterGradient("PeriodicResample") +def _periodic_resample_grad_cc(op, grad): + return periodic_resample_op_grad( + grad, op.inputs[0].shape, op.get_attr('shape')) diff --git a/tensorflow/contrib/predictor/contrib_estimator_predictor.py b/tensorflow/contrib/predictor/contrib_estimator_predictor.py index b7a98c68e2..af3b2ad1b5 100644 --- a/tensorflow/contrib/predictor/contrib_estimator_predictor.py +++ b/tensorflow/contrib/predictor/contrib_estimator_predictor.py @@ -34,7 +34,8 @@ class ContribEstimatorPredictor(predictor.Predictor): prediction_input_fn, input_alternative_key=None, output_alternative_key=None, - graph=None): + graph=None, + config=None): """Initialize a `ContribEstimatorPredictor`. Args: @@ -48,6 +49,7 @@ class ContribEstimatorPredictor(predictor.Predictor): multi-headed models. graph: Optional. The Tensorflow `graph` in which prediction should be done. + config: `ConfigProto` proto used to configure the session. """ self._graph = graph or ops.Graph() with self._graph.as_default(): @@ -58,6 +60,7 @@ class ContribEstimatorPredictor(predictor.Predictor): checkpoint_path = saver.latest_checkpoint(estimator.model_dir) self._session = monitored_session.MonitoredSession( session_creator=monitored_session.ChiefSessionCreator( + config=config, checkpoint_filename_with_path=checkpoint_path)) input_alternative_key = ( diff --git a/tensorflow/contrib/predictor/core_estimator_predictor.py b/tensorflow/contrib/predictor/core_estimator_predictor.py index d78d94c269..a725072e72 100644 --- a/tensorflow/contrib/predictor/core_estimator_predictor.py +++ b/tensorflow/contrib/predictor/core_estimator_predictor.py @@ -51,7 +51,8 @@ class CoreEstimatorPredictor(predictor.Predictor): estimator, serving_input_receiver_fn, output_key=None, - graph=None): + graph=None, + config=None): """Initialize a `CoreEstimatorPredictor`. Args: @@ -62,6 +63,7 @@ class CoreEstimatorPredictor(predictor.Predictor): `None`, then `DEFAULT_SERVING_SIGNATURE_DEF_KEY` is used. graph: Optional. The Tensorflow `graph` in which prediction should be done. + config: `ConfigProto` proto used to configure the session. """ self._graph = graph or ops.Graph() with self._graph.as_default(): @@ -71,6 +73,7 @@ class CoreEstimatorPredictor(predictor.Predictor): checkpoint_dir = estimator.model_dir self._session = monitored_session.MonitoredSession( session_creator=monitored_session.ChiefSessionCreator( + config=config, checkpoint_dir=checkpoint_dir)) feed_tensor_info = signature_def.inputs diff --git a/tensorflow/contrib/predictor/predictor_factories.py b/tensorflow/contrib/predictor/predictor_factories.py index 6e77e934fe..f275bc15ad 100644 --- a/tensorflow/contrib/predictor/predictor_factories.py +++ b/tensorflow/contrib/predictor/predictor_factories.py @@ -30,7 +30,8 @@ def from_contrib_estimator(estimator, prediction_input_fn, input_alternative_key=None, output_alternative_key=None, - graph=None): + graph=None, + config=None): """Constructs a `Predictor` from a `tf.contrib.learn.Estimator`. Args: @@ -44,6 +45,7 @@ def from_contrib_estimator(estimator, multi-headed models. graph: Optional. The Tensorflow `graph` in which prediction should be done. + config: `ConfigProto` proto used to configure the session. Returns: An initialized `Predictor`. @@ -62,13 +64,15 @@ def from_contrib_estimator(estimator, prediction_input_fn, input_alternative_key=input_alternative_key, output_alternative_key=output_alternative_key, - graph=graph) + graph=graph, + config=config) def from_estimator(estimator, serving_input_receiver_fn, output_key=None, - graph=None): + graph=None, + config=None): """Constructs a `Predictor` from a `tf.python.estimator.Estimator`. Args: @@ -79,6 +83,7 @@ def from_estimator(estimator, `None`, then `DEFAULT_SERVING_SIGNATURE_DEF_KEY` is used. graph: Optional. The Tensorflow `graph` in which prediction should be done. + config: `ConfigProto` proto used to configure the session. Returns: An initialized `Predictor`. @@ -93,14 +98,19 @@ def from_estimator(estimator, 'tf.contrib.learn.Estimator. You likely want to call ' 'from_contrib_estimator.') return core_estimator_predictor.CoreEstimatorPredictor( - estimator, serving_input_receiver_fn, output_key=output_key, graph=graph) + estimator, + serving_input_receiver_fn, + output_key=output_key, + graph=graph, + config=config) def from_saved_model(export_dir, signature_def_key=None, signature_def=None, tags=None, - graph=None): + graph=None, + config=None): """Constructs a `Predictor` from a `SavedModel` on disk. Args: @@ -115,6 +125,7 @@ def from_saved_model(export_dir, `SignatureDef`. Defaults to `DEFAULT_TAGS`. graph: Optional. The Tensorflow `graph` in which prediction should be done. + config: `ConfigProto` proto used to configure the session. Returns: An initialized `Predictor`. @@ -128,4 +139,5 @@ def from_saved_model(export_dir, signature_def_key=signature_def_key, signature_def=signature_def, tags=tags, - graph=graph) + graph=graph, + config=config) diff --git a/tensorflow/contrib/predictor/predictor_factories_test.py b/tensorflow/contrib/predictor/predictor_factories_test.py index 578d9424b2..a2ef1dc3af 100644 --- a/tensorflow/contrib/predictor/predictor_factories_test.py +++ b/tensorflow/contrib/predictor/predictor_factories_test.py @@ -20,6 +20,7 @@ from __future__ import print_function from tensorflow.contrib.predictor import predictor_factories from tensorflow.contrib.predictor import testing_common +from tensorflow.core.protobuf import config_pb2 from tensorflow.python.platform import test MODEL_DIR_NAME = 'contrib/predictor/test_export_dir' @@ -41,6 +42,11 @@ class PredictorFactoriesTest(test.TestCase): """Test loading from_saved_model with tags.""" predictor_factories.from_saved_model(self._export_dir, tags='serve') + def testFromSavedModelWithSessionConfig(self): + """Test loading from_saved_model with session config.""" + predictor_factories.from_saved_model( + self._export_dir, config=config_pb2.ConfigProto()) + def testFromSavedModelWithBadTags(self): """Test that loading fails for bad tags.""" bad_tags_regex = ('.*? could not be found in SavedModel') @@ -53,6 +59,13 @@ class PredictorFactoriesTest(test.TestCase): predictor_factories.from_contrib_estimator( estimator, input_fn, output_alternative_key='sum') + def testFromContribEstimatorWithSessionConfig(self): + estimator = testing_common.get_arithmetic_estimator(core=False) + input_fn = testing_common.get_arithmetic_input_fn(core=False) + predictor_factories.from_contrib_estimator( + estimator, input_fn, output_alternative_key='sum', + config=config_pb2.ConfigProto()) + def testFromContribEstimatorWithCoreEstimatorRaises(self): estimator = testing_common.get_arithmetic_estimator(core=True) input_fn = testing_common.get_arithmetic_input_fn(core=True) @@ -64,6 +77,12 @@ class PredictorFactoriesTest(test.TestCase): input_fn = testing_common.get_arithmetic_input_fn(core=True) predictor_factories.from_estimator(estimator, input_fn) + def testFromCoreEstimatorWithSessionConfig(self): + estimator = testing_common.get_arithmetic_estimator(core=True) + input_fn = testing_common.get_arithmetic_input_fn(core=True) + predictor_factories.from_estimator( + estimator, input_fn, config=config_pb2.ConfigProto()) + def testFromCoreEstimatorWithContribEstimatorRaises(self): estimator = testing_common.get_arithmetic_estimator(core=False) input_fn = testing_common.get_arithmetic_input_fn(core=False) diff --git a/tensorflow/contrib/predictor/saved_model_predictor.py b/tensorflow/contrib/predictor/saved_model_predictor.py index 0dbca0f813..95da6d04ed 100644 --- a/tensorflow/contrib/predictor/saved_model_predictor.py +++ b/tensorflow/contrib/predictor/saved_model_predictor.py @@ -121,7 +121,8 @@ class SavedModelPredictor(predictor.Predictor): input_names=None, output_names=None, tags=None, - graph=None): + graph=None, + config=None): """Initialize a `CoreEstimatorPredictor`. Args: @@ -142,6 +143,7 @@ class SavedModelPredictor(predictor.Predictor): the correct `SignatureDef`. Defaults to `DEFAULT_TAGS`. graph: Optional. The Tensorflow `graph` in which prediction should be done. + config: `ConfigProto` proto used to configure the session. Raises: ValueError: If more than one of signature_def_key OR signature_def OR (input_names AND output_names) is specified. @@ -152,7 +154,7 @@ class SavedModelPredictor(predictor.Predictor): self._graph = graph or ops.Graph() with self._graph.as_default(): - self._session = session.Session() + self._session = session.Session(config=config) loader.load(self._session, tags.split(','), export_dir) if input_names is None: diff --git a/tensorflow/contrib/quantize/README.md b/tensorflow/contrib/quantize/README.md index c83623ec94..27a933c0f9 100644 --- a/tensorflow/contrib/quantize/README.md +++ b/tensorflow/contrib/quantize/README.md @@ -6,7 +6,7 @@ inference. The details of the transformation implemented in this package is described here [1]. This is done using the -[fake quantization op](https://www.tensorflow.org/versions/r0.12/api_docs/python/array_ops/fake_quantization). +[fake quantization op](https://www.tensorflow.org/api_guides/python/array_ops#Fake_quantization). Literature has shown that fixed point networks provide comparable performance to floating point networks [2]. This is achieved by modeling the quantization diff --git a/tensorflow/contrib/slim/python/slim/evaluation_test.py b/tensorflow/contrib/slim/python/slim/evaluation_test.py index 94fc12ca81..3d0308aaf3 100644 --- a/tensorflow/contrib/slim/python/slim/evaluation_test.py +++ b/tensorflow/contrib/slim/python/slim/evaluation_test.py @@ -26,7 +26,6 @@ import time import numpy as np from tensorflow.contrib.framework.python.ops import variables as variables_lib -from tensorflow.contrib.metrics.python.ops import metric_ops from tensorflow.contrib.slim.python.slim import evaluation from tensorflow.contrib.training.python.training import evaluation as evaluation_lib from tensorflow.core.protobuf import saver_pb2 @@ -37,6 +36,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import metrics from tensorflow.python.ops import variables from tensorflow.python.platform import flags from tensorflow.python.platform import gfile @@ -89,8 +89,8 @@ class EvaluationTest(test.TestCase): self._predictions, self._scale = TestModel(self._inputs) def testFinalOpsOnEvaluationLoop(self): - value_op, update_op = metric_ops.streaming_accuracy(self._predictions, - self._labels) + value_op, update_op = metrics.accuracy( + labels=self._labels, predictions=self._predictions) init_op = control_flow_ops.group(variables.global_variables_initializer(), variables.local_variables_initializer()) # Create checkpoint and log directories: @@ -136,9 +136,10 @@ class EvaluationTest(test.TestCase): self.assertTrue(obj.hook_was_run) def _create_names_to_metrics(self, predictions, labels): - accuracy0, update_op0 = metric_ops.streaming_accuracy(predictions, labels) - accuracy1, update_op1 = metric_ops.streaming_accuracy(predictions + 1, - labels) + accuracy0, update_op0 = metrics.accuracy( + labels=labels, predictions=predictions) + accuracy1, update_op1 = metrics.accuracy( + labels=labels, predictions=predictions + 1) names_to_values = {'Accuracy': accuracy0, 'Another_accuracy': accuracy1} names_to_updates = {'Accuracy': update_op0, 'Another_accuracy': update_op1} @@ -198,8 +199,8 @@ class EvaluationTest(test.TestCase): predictions_limited = input.limit_epochs(self._predictions, num_epochs=1) labels_limited = input.limit_epochs(self._labels, num_epochs=1) - value_op, update_op = metric_ops.streaming_accuracy( - predictions_limited, labels_limited) + value_op, update_op = metrics.accuracy( + labels=labels_limited, predictions=predictions_limited) init_op = control_flow_ops.group(variables.global_variables_initializer(), variables.local_variables_initializer()) @@ -260,8 +261,8 @@ class SingleEvaluationTest(test.TestCase): self._prepareCheckpoint(checkpoint_path) # Next, determine the metric to evaluate: - value_op, update_op = metric_ops.streaming_accuracy(self._predictions, - self._labels) + value_op, update_op = metrics.accuracy( + labels=self._labels, predictions=self._predictions) # Run the evaluation and verify the results: accuracy_value = evaluation.evaluate_once( @@ -276,8 +277,8 @@ class SingleEvaluationTest(test.TestCase): self._prepareCheckpoint(checkpoint_path) # Next, determine the metric to evaluate: - value_op, update_op = metric_ops.streaming_accuracy(self._predictions, - self._labels) + value_op, update_op = metrics.accuracy( + labels=self._labels, predictions=self._predictions) dumping_root = os.path.join(self.get_temp_dir(), 'tfdbg_dump_dir') dumping_hook = hooks.DumpingDebugHook(dumping_root, log_usage=False) diff --git a/tensorflow/contrib/summary/summary.py b/tensorflow/contrib/summary/summary.py index 99ced53e11..d22b80ac88 100644 --- a/tensorflow/contrib/summary/summary.py +++ b/tensorflow/contrib/summary/summary.py @@ -21,6 +21,7 @@ from @{tf.summary.merge_all} to @{tf.summary.FileWriter}. To use with eager execution enabled, write your code as follows: +```python global_step = tf.train.get_or_create_global_step() summary_writer = tf.contrib.summary.create_file_writer( train_dir, flush_millis=10000) @@ -30,9 +31,11 @@ with summary_writer.as_default(), tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar("loss", my_loss) # In this case every call to tf.contrib.summary.scalar will generate a record # ... +``` To use it with graph execution, write your code as follows: +```python global_step = tf.train.get_or_create_global_step() summary_writer = tf.contrib.summary.create_file_writer( train_dir, flush_millis=10000) @@ -53,7 +56,7 @@ with tf.Session(...) as sess: while not_done_training: sess.run([train_op, tf.contrib.summary.all_summary_ops()]) # ... - +``` """ from __future__ import absolute_import diff --git a/tensorflow/contrib/tensor_forest/client/eval_metrics.py b/tensorflow/contrib/tensor_forest/client/eval_metrics.py index e893e1d1c8..d8236a0a6f 100644 --- a/tensorflow/contrib/tensor_forest/client/eval_metrics.py +++ b/tensorflow/contrib/tensor_forest/client/eval_metrics.py @@ -21,10 +21,10 @@ import numpy as np from tensorflow.contrib import losses from tensorflow.contrib.learn.python.learn.estimators import prediction_key -from tensorflow.contrib.metrics.python.ops import metric_ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import metrics from tensorflow.python.ops import nn INFERENCE_PROB_NAME = prediction_key.PredictionKey.PROBABILITIES @@ -38,12 +38,13 @@ def _top_k_generator(k): targets = math_ops.to_int32(targets) if targets.get_shape().ndims > 1: targets = array_ops.squeeze(targets, axis=[1]) - return metric_ops.streaming_mean(nn.in_top_k(probabilities, targets, k)) + return metrics.mean(nn.in_top_k(probabilities, targets, k)) return _top_k def _accuracy(predictions, targets, weights=None): - return metric_ops.streaming_accuracy(predictions, targets, weights=weights) + return metrics.accuracy( + labels=targets, predictions=predictions, weights=weights) def _r2(probabilities, targets, weights=None): @@ -53,7 +54,7 @@ def _r2(probabilities, targets, weights=None): squares_residuals = math_ops.reduce_sum( math_ops.square(targets - probabilities), 0) score = 1 - math_ops.reduce_sum(squares_residuals / squares_total) - return metric_ops.streaming_mean(score, weights=weights) + return metrics.mean(score, weights=weights) def _squeeze_and_onehot(targets, depth): @@ -62,7 +63,7 @@ def _squeeze_and_onehot(targets, depth): def _sigmoid_entropy(probabilities, targets, weights=None): - return metric_ops.streaming_mean( + return metrics.mean( losses.sigmoid_cross_entropy(probabilities, _squeeze_and_onehot( targets, @@ -71,7 +72,7 @@ def _sigmoid_entropy(probabilities, targets, weights=None): def _softmax_entropy(probabilities, targets, weights=None): - return metric_ops.streaming_mean( + return metrics.mean( losses.sparse_softmax_cross_entropy(probabilities, math_ops.to_int32(targets)), weights=weights) @@ -82,7 +83,7 @@ def _predictions(predictions, unused_targets, **unused_kwargs): def _class_log_loss(probabilities, targets, weights=None): - return metric_ops.streaming_mean( + return metrics.mean( losses.log_loss(probabilities, _squeeze_and_onehot(targets, array_ops.shape(probabilities)[1])), @@ -90,34 +91,36 @@ def _class_log_loss(probabilities, targets, weights=None): def _precision(predictions, targets, weights=None): - return metric_ops.streaming_precision(predictions, targets, weights=weights) + return metrics.precision( + labels=targets, predictions=predictions, weights=weights) def _precision_at_thresholds(predictions, targets, weights=None): - return metric_ops.streaming_precision_at_thresholds( - array_ops.slice(predictions, [0, 1], [-1, 1]), - targets, - np.arange( - 0, 1, 0.01, dtype=np.float32), + return metrics.precision_at_thresholds( + labels=targets, + predictions=array_ops.slice(predictions, [0, 1], [-1, 1]), + thresholds=np.arange(0, 1, 0.01, dtype=np.float32), weights=weights) def _recall(predictions, targets, weights=None): - return metric_ops.streaming_recall(predictions, targets, weights=weights) + return metrics.recall( + labels=targets, predictions=predictions, weights=weights) def _recall_at_thresholds(predictions, targets, weights=None): - return metric_ops.streaming_recall_at_thresholds( - array_ops.slice(predictions, [0, 1], [-1, 1]), - targets, - np.arange( - 0, 1, 0.01, dtype=np.float32), + return metrics.recall_at_thresholds( + labels=targets, + predictions=array_ops.slice(predictions, [0, 1], [-1, 1]), + thresholds=np.arange(0, 1, 0.01, dtype=np.float32), weights=weights) def _auc(probs, targets, weights=None): - return metric_ops.streaming_auc(array_ops.slice(probs, [0, 1], [-1, 1]), - targets, weights=weights) + return metrics.auc( + labels=targets, + predictions=array_ops.slice(probs, [0, 1], [-1, 1]), + weights=weights) _EVAL_METRICS = { diff --git a/tensorflow/contrib/tensor_forest/python/tensor_forest.py b/tensorflow/contrib/tensor_forest/python/tensor_forest.py index 7a35a70bbe..6f62cd11a9 100644 --- a/tensorflow/contrib/tensor_forest/python/tensor_forest.py +++ b/tensorflow/contrib/tensor_forest/python/tensor_forest.py @@ -295,7 +295,7 @@ def get_epoch_variable(): # A simple container to hold the training variables for a single tree. -class TreeTrainingVariables(object): +class TreeVariables(object): """Stores tf.Variables for training a single random tree. Uses tf.get_variable to get tree-specific names so that this can be used @@ -303,7 +303,7 @@ class TreeTrainingVariables(object): then relies on restoring that model to evaluate). """ - def __init__(self, params, tree_num, training): + def __init__(self, params, tree_num, training, tree_config='', tree_stat=''): if (not hasattr(params, 'params_proto') or not isinstance(params.params_proto, _params_proto.TensorForestParams)): @@ -315,27 +315,28 @@ class TreeTrainingVariables(object): # TODO(gilberth): Manually shard this to be able to fit it on # multiple machines. self.stats = stats_ops.fertile_stats_variable( - params, '', self.get_tree_name('stats', tree_num)) + params, tree_stat, self.get_tree_name('stats', tree_num)) self.tree = model_ops.tree_variable( - params, '', self.stats, self.get_tree_name('tree', tree_num)) + params, tree_config, self.stats, self.get_tree_name('tree', tree_num)) def get_tree_name(self, name, num): return '{0}-{1}'.format(name, num) -class ForestTrainingVariables(object): +class ForestVariables(object): """A container for a forests training data, consisting of multiple trees. - Instantiates a TreeTrainingVariables object for each tree. We override the + Instantiates a TreeVariables object for each tree. We override the __getitem__ and __setitem__ function so that usage looks like this: - forest_variables = ForestTrainingVariables(params) + forest_variables = ForestVariables(params) ... forest_variables.tree ... """ def __init__(self, params, device_assigner, training=True, - tree_variables_class=TreeTrainingVariables): + tree_variables_class=TreeVariables, + tree_configs=None, tree_stats=None): self.variables = [] # Set up some scalar variables to run through the device assigner, then # we can use those to colocate everything related to a tree. @@ -347,7 +348,13 @@ class ForestTrainingVariables(object): for i in range(params.num_trees): with ops.device(self.device_dummies[i].device): - self.variables.append(tree_variables_class(params, i, training)) + kwargs = {} + if tree_configs is not None: + kwargs.update(dict(tree_config=tree_configs[i])) + if tree_stats is not None: + kwargs.update(dict(tree_stat=tree_stats[i])) + self.variables.append(tree_variables_class( + params, i, training, **kwargs)) def __setitem__(self, t, val): self.variables[t] = val @@ -361,9 +368,11 @@ class RandomForestGraphs(object): def __init__(self, params, + tree_configs=None, + tree_stats=None, device_assigner=None, variables=None, - tree_variables_class=TreeTrainingVariables, + tree_variables_class=TreeVariables, tree_graphs=None, training=True): self.params = params @@ -371,9 +380,10 @@ class RandomForestGraphs(object): device_assigner or framework_variables.VariableDeviceChooser()) logging.info('Constructing forest with params = ') logging.info(self.params.__dict__) - self.variables = variables or ForestTrainingVariables( + self.variables = variables or ForestVariables( self.params, device_assigner=self.device_assigner, training=training, - tree_variables_class=tree_variables_class) + tree_variables_class=tree_variables_class, + tree_configs=tree_configs, tree_stats=tree_stats) tree_graph_class = tree_graphs or RandomTreeGraphs self.trees = [ tree_graph_class(self.variables[i], self.params, i) diff --git a/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py b/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py index bbe627b157..1c9c81827e 100644 --- a/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py +++ b/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py @@ -18,10 +18,14 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from google.protobuf.json_format import ParseDict +from tensorflow.contrib.decision_trees.proto import generic_tree_model_pb2 as _tree_proto from tensorflow.contrib.tensor_forest.python import tensor_forest from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util +from tensorflow.python.ops import resources +from tensorflow.python.ops import variables from tensorflow.python.platform import googletest @@ -110,6 +114,47 @@ class TensorForestTest(test_util.TensorFlowTestCase): self.assertTrue(isinstance(paths, ops.Tensor)) self.assertTrue(isinstance(var, ops.Tensor)) + def testInfrenceFromRestoredModel(self): + input_data = [[-1., 0.], [-1., 2.], # node 1 + [1., 0.], [1., -2.]] # node 2 + expected_prediction = [[0.0, 1.0], [0.0, 1.0], + [0.0, 1.0], [0.0, 1.0]] + hparams = tensor_forest.ForestHParams( + num_classes=2, + num_features=2, + num_trees=1, + max_nodes=1000, + split_after_samples=25).fill() + tree_weight = {'decisionTree': + {'nodes': + [{'binaryNode': + {'rightChildId': 2, + 'leftChildId': 1, + 'inequalityLeftChildTest': + {'featureId': {'id': '0'}, + 'threshold': {'floatValue': 0}}}}, + {'leaf': {'vector': + {'value': [{'floatValue': 0.0}, + {'floatValue': 1.0}]}}, + 'nodeId': 1}, + {'leaf': {'vector': + {'value': [{'floatValue': 0.0}, + {'floatValue': 1.0}]}}, + 'nodeId': 2}]}} + restored_tree_param = ParseDict(tree_weight, + _tree_proto.Model()).SerializeToString() + graph_builder = tensor_forest.RandomForestGraphs(hparams, + [restored_tree_param]) + probs, paths, var = graph_builder.inference_graph(input_data) + self.assertTrue(isinstance(probs, ops.Tensor)) + self.assertTrue(isinstance(paths, ops.Tensor)) + self.assertTrue(isinstance(var, ops.Tensor)) + with self.test_session(): + variables.global_variables_initializer().run() + resources.initialize_resources(resources.shared_resources()).run() + self.assertEquals(probs.eval().shape, (4, 2)) + self.assertEquals(probs.eval().tolist(), expected_prediction) + def testTrainingConstructionClassificationSparse(self): input_data = sparse_tensor.SparseTensor( indices=[[0, 0], [0, 3], [1, 0], [1, 7], [2, 1], [3, 9]], diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index b7b26cfb1c..da4dd5a14c 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -91,8 +91,11 @@ void GetSubGraphIncomingEdges(const tensorflow::Graph& graph, if (!subgraph_node_ids.count(edge->src()->id()) && !edge->src()->IsSource() && !edge->IsControlEdge()) { incoming_edges->insert(edge); + VLOG(2) << "INCOMING " << edge->src()->name() << " -> " << node->name() + << " Y, "; } else { - VLOG(2) << node->name() << " -> " << edge->src()->name() << " N, "; + VLOG(2) << "INCOMING " << edge->src()->name() << " -> " << node->name() + << " N, "; } } } @@ -106,10 +109,12 @@ void GetSubGraphOutgoingEdges(const tensorflow::Graph& graph, for (const tensorflow::Edge* edge : node->out_edges()) { if (!subgraph_node_ids.count(edge->dst()->id()) && !edge->dst()->IsSink() && !edge->IsControlEdge()) { - VLOG(2) << node->name() << " -> " << edge->dst()->name() << " Y, "; + VLOG(2) << "OUTGOING " << node->name() << " -> " << edge->dst()->name() + << " Y, "; outgoing_edges->insert(edge); } else { - VLOG(2) << node->name() << " -> " << edge->dst()->name() << " N, "; + VLOG(2) << "OUTGOING " << node->name() << " -> " << edge->dst()->name() + << " N, "; } } } @@ -181,29 +186,27 @@ struct ConvertGraphParams { static tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams* p) { GetSubGraphIncomingEdges(p->graph, p->subgraph_node_ids, &p->subgraph_incoming_edges); + + std::set<std::pair<int, int>> unique_tensors; + // Add only unique input source nodes. If output of an outside node is shared + // between multiple nodes inside the engine, only one edge should be created for (const tensorflow::Edge* edge : p->subgraph_incoming_edges) { - p->subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); - } - auto output_name_to_index_map = BuildTensorNameMap(p->output_names); - std::set<std::pair<int, int>> subgraph_outputs_set; - // Collect outputs referenced from output_names - for (int node_id : p->subgraph_node_ids) { - tensorflow::Node* node = p->graph.FindNodeId(node_id); - if (output_name_to_index_map.count(node->name())) { - for (int index : output_name_to_index_map.at(node->name())) { - subgraph_outputs_set.insert({node_id, index}); - } - } + unique_tensors.insert({edge->src()->id(), edge->src_output()}); } + p->subgraph_inputs.insert(p->subgraph_inputs.begin(), unique_tensors.begin(), + unique_tensors.end()); GetSubGraphOutgoingEdges(p->graph, p->subgraph_node_ids, &p->subgraph_outgoing_edges); + unique_tensors.clear(); + // Similar to above, if multiple ouside nodes are sharing the output of an + // internal node only one output port should be created and shared between + // outputs for (const tensorflow::Edge* edge : p->subgraph_outgoing_edges) { - subgraph_outputs_set.insert({edge->src()->id(), edge->src_output()}); + unique_tensors.insert({edge->src()->id(), edge->src_output()}); } - p->subgraph_outputs.reserve(subgraph_outputs_set.size()); + p->subgraph_outputs.reserve(unique_tensors.size()); p->subgraph_outputs.insert(p->subgraph_outputs.begin(), - subgraph_outputs_set.begin(), - subgraph_outputs_set.end()); + unique_tensors.begin(), unique_tensors.end()); return tensorflow::Status::OK(); } @@ -225,7 +228,6 @@ tensorflow::Status GetCalibNode(ConvertGraphParams* params) { for (auto in_edge : params->subgraph_incoming_edges) { // loop over incoming edges and // attach them to calib node - // tensorflow::Node* src_node = in_edge->src(); auto src_output = in_edge->src_output(); auto dst_node = in_edge->dst(); auto dst_input = in_edge->dst_input(); @@ -257,19 +259,24 @@ tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) { for (size_t i = 0; i < params->subgraph_inputs.size(); ++i) { subgraph_edge_to_input_map.insert({params->subgraph_inputs.at(i), i}); } + std::set<std::pair<int, int>> unique_tensors; for (const tensorflow::Edge* edge : params->subgraph_incoming_edges) { std::pair<int, int> old_src = {edge->src()->id(), edge->src_output()}; + if (unique_tensors.count(old_src)) continue; + unique_tensors.insert(old_src); int new_src_output = subgraph_edge_to_input_map.at(old_src); params->graph.AddEdge(edge->src(), edge->src_output(), trt_node, new_src_output); + VLOG(1) << "Wire " << edge->src()->name() << ":" << edge->src_output() + << " -> " << trt_node->name() << ":" << new_src_output; params->graph.RemoveEdge(edge); } - - VLOG(2) << "new wiring edges: " << trt_node->in_edges().size(); - for (const tensorflow::Edge* edge : trt_node->in_edges()) { - VLOG(2) << edge->src()->name() << " port: " << edge->src_output(); + if (VLOG_IS_ON(2)) { + VLOG(2) << "new edge count: " << trt_node->in_edges().size(); + for (const tensorflow::Edge* edge : trt_node->in_edges()) { + VLOG(2) << edge->src()->name() << " port: " << edge->src_output(); + } } - TF_RETURN_IF_ERROR(status); // Re-map outgoing edges to use the new TRT node instead of the orig subgraph @@ -283,6 +290,8 @@ tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) { int new_src_output = subgraph_edge_to_output_map.at(old_src); TF_RETURN_IF_ERROR(params->graph.UpdateEdge( trt_node, new_src_output, edge->dst(), edge->dst_input())); + VLOG(1) << "Wire " << trt_node->name() << ":" << new_src_output << " -> " + << edge->dst()->name() << ":" << edge->dst_input(); } // Remove the original subgraph for (int node_id : params->subgraph_node_ids) { @@ -317,9 +326,12 @@ tensorflow::Status ConvertCalibGraphToInferGraph( tensorflow::GraphConstructorOptions(), graph_def, &graph)); // get calib nodes std::vector<tensorflow::Node*> calib_nodes; - for (auto node : graph.op_nodes()) { + std::vector<tensorflow::Node*> topo_order; + tensorflow::GetPostOrder(graph, &topo_order); + for (auto rit = topo_order.rbegin(); rit != topo_order.rend(); ++rit) { + auto node = *rit; if (node->type_string() == "TRTCalibOp") { - VLOG(1) << "Found Calib Node"; + VLOG(1) << "Found Calib Node " << node->name(); calib_nodes.push_back(node); } } diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 96e0700862..4e4d295538 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -362,10 +362,11 @@ void ReorderCKtoKC(const TRT_ShapedWeights& iweights, break; } case tensorflow::DataType::DT_HALF: { - Reorder2({k, c}, static_cast<Eigen::half const*>(iweights.GetValues()), - istrides, static_cast<Eigen::half*>( - const_cast<void*>(oweights->GetValues())), - ostrides); + Reorder2( + {k, c}, static_cast<Eigen::half const*>(iweights.GetValues()), + istrides, + static_cast<Eigen::half*>(const_cast<void*>(oweights->GetValues())), + ostrides); break; } default: @@ -1179,9 +1180,9 @@ tensorflow::Status BinaryTensorOpTensor( CHECK_EQ_TYPE(tensor_r->getType(), dtype); auto op_pair = ops.find(node_def.op()); if (op_pair == ops.end()) - return tensorflow::errors::Unimplemented("binary op: " + node_def.op() + - " not supported at: " + - node_def.name()); + return tensorflow::errors::Unimplemented( + "binary op: " + node_def.op() + + " not supported at: " + node_def.name()); nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( *const_cast<nvinfer1::ITensor*>(tensor_l), @@ -2138,9 +2139,7 @@ void Converter::register_op_converters() { } } // namespace -tensorflow::Status GetTensorRTGraph(tensorrt::convert::SubGraphParams& s) { - return tensorflow::errors::Unimplemented("Not implemented yet"); -} + tensorflow::Status ConvertCalibrationNodeToEngineNode( tensorflow::Graph& graph, tensorflow::Node* c_node) { const auto ndef = c_node->def(); @@ -2164,9 +2163,23 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( for (auto n : graph.op_nodes()) { node_maps.insert({n->name(), n}); } + std::set<int> subgraph_ids; + for (const auto internal_node : segment_nodes) { + subgraph_ids.insert(node_maps.at(internal_node)->id()); + } + if (VLOG_IS_ON(2)) { + string node_names = StrCat(c_node->name(), " segment nodes= "); + + for (const auto& node_name : segment_nodes) { + StrAppend(&node_names, node_name, ", "); + } + VLOG(2) << node_names; + } + VLOG(1) << "Output Nodes:"; std::vector<tensorflow::DataType> out_types; std::vector<const tensorflow::Edge*> out_edges; + for (auto& i : output_nodes) { auto node_port = tensorflow::str_util::Split(i, ":"); VLOG(1) << " " << i << " in graph " << node_maps.count(i); @@ -2186,18 +2199,24 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( out_types.push_back(out_node->output_type(0)); } for (auto out_edge : out_node->out_edges()) { + if (subgraph_ids.count(out_edge->dst()->id())) + continue; // skip internal edges; if (out_edge->src_output() == port) { out_edges.push_back(out_edge); - break; + VLOG(1) << "OUTPUT EDGE " << out_edge->src()->name() << ":" + << out_edge->src_output() << " -> " << out_edge->dst()->name() + << ":" << out_edge->dst_input(); } } } else { LOG(WARNING) << " couldn't find output node " << out_node_name; } } - VLOG(1) << "Input Nodes:"; - for (auto& i : input_names) { - VLOG(1) << " " << i << " in graph " << node_maps.count(i); + if (VLOG_IS_ON(1)) { + VLOG(1) << c_node->name() << " Input Nodes:"; + for (auto& i : input_names) { + VLOG(1) << " Input " << i << " in graph " << node_maps.count(i); + } } auto trt_rm = tensorflow::tensorrt::TRTResourceManager::instance(); auto resmgr = trt_rm->getManager("TRTCalibOps"); @@ -2231,14 +2250,24 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( calib_res->builder_ = nullptr; tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); std::vector<tensorflow::NodeDefBuilder::NodeOut> income_edges; + income_edges.resize(c_node->num_inputs()); for (const auto in_edge : c_node->in_edges()) { auto src = in_edge->src(); int dest_port = in_edge->dst_input(); - income_edges.emplace_back(src->name(), in_edge->src_output(), - c_node->input_type(dest_port)); + VLOG(1) << "Incoming connection " << src->name() << ":" + << in_edge->src_output() << " -> " << c_node->name() << ":" + << dest_port; + income_edges.at(dest_port) = {src->name(), in_edge->src_output(), + c_node->input_type(dest_port)}; } tensorflow::gtl::ArraySlice<tensorflow::NodeDefBuilder::NodeOut> input_list( income_edges); + if (VLOG_IS_ON(2)) { + for (const auto& inp : input_list) { + VLOG(2) << " Input from inputlist " << inp.node << ":" << inp.index << " " + << tensorflow::DataTypeString(inp.data_type); + } + } op_builder.Input(input_list); tensorflow::NodeDef engine_node; const char* engine_plan_data = static_cast<const char*>(engine_plan->data()); @@ -2255,13 +2284,26 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( } auto trt_engine_node = graph.AddNode(engine_node, &status); TF_RETURN_IF_ERROR(status); - for (size_t i = 0; i < out_edges.size(); i++) { - VLOG(1) << "Connecting trt_engine_node output " << i << " with " - << out_edges.at(i)->dst()->name() << " port " - << out_edges.at(i)->dst_input(); - TF_RETURN_IF_ERROR(graph.UpdateEdge(trt_engine_node, i, - out_edges.at(i)->dst(), - out_edges.at(i)->dst_input())); + std::map<string, int> port_map; + for (size_t t = 0; t < output_nodes.size(); t++) { + port_map.insert({output_nodes.at(t), t}); + } + for (auto& i : out_edges) { + string s(i->src()->name()); + if (i->src_output()) StrAppend(&s, ":", i->src_output()); + int out_port = port_map.at(s); + VLOG(1) << "Connecting " << trt_engine_node->name() << ":" << out_port + << " -> " << i->dst()->name() << ":" << i->dst_input(); + TF_RETURN_IF_ERROR( + graph.UpdateEdge(trt_engine_node, out_port, i->dst(), i->dst_input())); + } + for (const auto ed : trt_engine_node->in_edges()) { + VLOG(1) << "In Edge " << ed->src()->name() << ":" << ed->src_output() + << " -> " << ed->dst()->name() << ":" << ed->dst_input(); + } + for (const auto ed : trt_engine_node->out_edges()) { + VLOG(1) << "Out Edge " << ed->src()->name() << ":" << ed->src_output() + << " -> " << ed->dst()->name() << ":" << ed->dst_input(); } VLOG(1) << "Segment nodes:"; for (auto& i : segment_nodes) { @@ -2332,6 +2374,7 @@ tensorflow::Status ConvertSubgraph( std::vector<string>* output_names, std::vector<tensorflow::DataType>* output_dtypes, const string& engine_name) { + std::set<string> added_tensors; for (const std::pair<int, int>& input : s.input_inds) { VLOG(2) << "parsing input. Node id= " << input.first; int node_id = input.first; @@ -2374,7 +2417,6 @@ tensorflow::Status ConvertSubgraph( auto op_info = op_info_vec.at(shape_inference_output_idx); tensorflow::DataType tf_dtype = op_info.dtype(); - input_dtypes->push_back(tf_dtype); nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); auto type_status = ConvertDType(tf_dtype, &dtype); @@ -2410,8 +2452,10 @@ tensorflow::Status ConvertSubgraph( if (output_idx != 0) { input_tensor_name = StrCat(node_name, ":", output_idx); } - + if (added_tensors.count(input_tensor_name)) continue; + added_tensors.insert(input_tensor_name); input_names->push_back(input_tensor_name); + input_dtypes->push_back(tf_dtype); nvinfer1::ITensor* input_tensor = converter.network()->addInput( input_tensor_name.c_str(), dtype, input_dim_pseudo_chw); @@ -2435,6 +2479,7 @@ tensorflow::Status ConvertSubgraph( // Gather output metadata int trt_engine_op_output_idx = 0; + added_tensors.clear(); for (const std::pair<int, int>& output : s.output_inds) { int node_id = output.first; int output_idx = output.second; @@ -2451,6 +2496,8 @@ tensorflow::Status ConvertSubgraph( if (output_idx != 0) tensorflow::strings::StrAppend(&tensor_name, ":", output_idx); VLOG(2) << "Output tensor name: " << tensor_name; + if (added_tensors.count(tensor_name)) continue; + added_tensors.insert(tensor_name); output_names->push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); if (!tensor_or_weights.is_tensor()) { diff --git a/tensorflow/contrib/tpu/python/tpu/datasets.py b/tensorflow/contrib/tpu/python/tpu/datasets.py index 2e472a2805..d879170b68 100644 --- a/tensorflow/contrib/tpu/python/tpu/datasets.py +++ b/tensorflow/contrib/tpu/python/tpu/datasets.py @@ -166,11 +166,21 @@ def StreamingFilesDataset(files, return remote_iterator.get_next() def MapFn(unused_input): - return functional_ops.remote_call( + if isinstance(source_dataset.output_types, dtypes.DType): + output_types = [source_dataset.output_types] + elif isinstance(source_dataset.output_types, (list, tuple)): + output_types = source_dataset.output_types + else: + raise ValueError('source dataset has invalid output types') + remote_calls = functional_ops.remote_call( args=[source_handle], - Tout=[dtypes.string], + Tout=output_types, f=LoadingFunc, - target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job)[0] + target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job) + if len(remote_calls) == 1: + return remote_calls[0] + else: + return remote_calls with ops.device('/job:%s' % worker_job): output_dataset = dataset_ops.Dataset.range(2).repeat().map( diff --git a/tensorflow/contrib/tpu/python/tpu/datasets_test.py b/tensorflow/contrib/tpu/python/tpu/datasets_test.py index 918cf0ed8e..b58d05eac5 100644 --- a/tensorflow/contrib/tpu/python/tpu/datasets_test.py +++ b/tensorflow/contrib/tpu/python/tpu/datasets_test.py @@ -26,6 +26,8 @@ from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import readers +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import tensor_shape from tensorflow.python.lib.io import python_io from tensorflow.python.platform import test from tensorflow.python.training import server_lib @@ -162,6 +164,30 @@ class DatasetsTest(test.TestCase): self.assertEqual(set(all_contents), set(retrieved_values)) + def testArbitraryReaderFuncFromDatasetGenerator(self): + + def my_generator(): + yield (1, [1] * 10) + + def gen_dataset(dummy): + return dataset_ops.Dataset.from_generator( + my_generator, (dtypes.int64, dtypes.int64), + (tensor_shape.TensorShape([]), tensor_shape.TensorShape([10]))) + + dataset = datasets.StreamingFilesDataset( + dataset_ops.Dataset.range(10), filetype=gen_dataset) + + iterator = dataset.make_initializable_iterator() + self._sess.run(iterator.initializer) + get_next = iterator.get_next() + + retrieved_values = self._sess.run(get_next) + + self.assertIsInstance(retrieved_values, (list, tuple)) + self.assertEqual(len(retrieved_values), 2) + self.assertEqual(retrieved_values[0], 1) + self.assertItemsEqual(retrieved_values[1], [1] * 10) + def testUnexpectedFiletypeString(self): with self.assertRaises(ValueError): datasets.StreamingFilesDataset( diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index d89633199d..b1c224a345 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -699,7 +699,9 @@ cc_library( srcs = ["platform/stacktrace_handler.cc"], hdrs = ["platform/stacktrace_handler.h"], deps = [ + ":abi", ":lib_platform", + ":stacktrace", ], ) @@ -3089,6 +3091,8 @@ cc_library( # we now need at least "str_util". ":lib", ":lib_platform", + ":stacktrace_handler", + ":test_lite", "//tensorflow/core/platform/default/build_config:test_lite_main", ], alwayslink = 1, @@ -3569,7 +3573,10 @@ tf_cc_tests_gpu( tf_cc_test_mkl( name = "mkl_runtime_tests", size = "small", - srcs = ["common_runtime/mkl_cpu_allocator_test.cc"], + srcs = [ + "common_runtime/mkl_cpu_allocator_test.cc", + "common_runtime/mkl_threadpool_device_test.cc", + ], linkstatic = 1, deps = [ ":core", diff --git a/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt b/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt index cbe76de415..985f09312f 100644 --- a/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt @@ -4,6 +4,10 @@ op { description: <<END if < 0, `scale * features` otherwise. +To be used together with +`initializer = tf.variance_scaling_initializer(factor=1.0, mode='FAN_IN')`. +For correct dropout, use `tf.contrib.nn.alpha_dropout`. + See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) END } diff --git a/tensorflow/core/api_def/base_api/api_def_StringSplitV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_StringSplitV2.pbtxt new file mode 100644 index 0000000000..6e13d0d049 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_StringSplitV2.pbtxt @@ -0,0 +1,48 @@ +op { + graph_op_name: "StringSplitV2" + in_arg { + name: "input" + description: <<END +`1-D` string `Tensor`, the strings to split. +END + } + in_arg { + name: "sep" + description: <<END +`0-D` string `Tensor`, the delimiter character. +END + } + attr { + name: "maxsplit" + description: <<END +An `int`. If `maxsplit > 0`, limit of the split of the result. +END + } + summary: "Split elements of `source` based on `sep` into a `SparseTensor`." + description: <<END +Let N be the size of source (typically N will be the batch size). Split each +element of `source` based on `sep` and return a `SparseTensor` +containing the split tokens. Empty tokens are ignored. + +For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c', +then the output will be +``` +st.indices = [0, 0; + 0, 1; + 1, 0; + 1, 1; + 1, 2] +st.shape = [2, 3] +st.values = ['hello', 'world', 'a', 'b', 'c'] +``` + +If `sep` is given, consecutive delimiters are not grouped together and are +deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and +sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty +string, consecutive whitespace are regarded as a single separator, and the +result will contain no empty strings at the startor end if the string has +leading or trailing whitespace. + +Note that the above mentioned behavior matches python's str.split. +END +} diff --git a/tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt new file mode 100644 index 0000000000..0e8576fb01 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "StringSplitV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc index 8f2a419756..9cda17867b 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.cc +++ b/tensorflow/core/common_runtime/bfc_allocator.cc @@ -86,7 +86,7 @@ BFCAllocator::Chunk* BFCAllocator::ChunkFromHandle(ChunkHandle h) { return &(chunks_[h]); } -bool BFCAllocator::Extend(size_t rounded_bytes) { +bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) { size_t available_bytes = memory_limit_ - total_region_allocated_bytes_; // Rounds available_bytes down to the nearest multiple of kMinAllocationSize. available_bytes = (available_bytes / kMinAllocationSize) * kMinAllocationSize; @@ -108,7 +108,7 @@ bool BFCAllocator::Extend(size_t rounded_bytes) { // Try allocating. size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes); - void* mem_addr = suballocator_->Alloc(32, bytes); + void* mem_addr = suballocator_->Alloc(alignment, bytes); if (mem_addr == nullptr && !started_backpedal_) { // Only backpedal once. started_backpedal_ = true; @@ -119,7 +119,7 @@ bool BFCAllocator::Extend(size_t rounded_bytes) { while (mem_addr == nullptr) { bytes = RoundedBytes(bytes * kBackpedalFactor); if (bytes < rounded_bytes) break; - mem_addr = suballocator_->Alloc(32, bytes); + mem_addr = suballocator_->Alloc(alignment, bytes); } } @@ -261,7 +261,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment, } // Try to extend - if (Extend(rounded_bytes)) { + if (Extend(unused_alignment, rounded_bytes)) { ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes); if (ptr != nullptr) { return ptr; diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h index ba5a3eea3a..52aedb1e9c 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.h +++ b/tensorflow/core/common_runtime/bfc_allocator.h @@ -305,7 +305,8 @@ class BFCAllocator : public VisitableAllocator { // Try to add a new memory region that can satisfy an allocation of // 'rounded_bytes' bytes. Returns true on success and false on // failure. - bool Extend(size_t rounded_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_); + bool Extend(size_t alignment, size_t rounded_bytes) + EXCLUSIVE_LOCKS_REQUIRED(lock_); // Returns a pointer to an underlying allocated chunk of size // 'rounded_bytes'. diff --git a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc index c21a1ea9f2..9028e6298c 100644 --- a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc +++ b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc @@ -102,9 +102,25 @@ TEST(DirectSessionWithTrackingAllocTest, CostModelTest) { EXPECT_EQ(2, shape.dim(0).size()); EXPECT_EQ(1, shape.dim(1).size()); if (node->name() == y->name()) { +#ifdef INTEL_MKL + // if MKL is used, it goes through various additional + // graph rewrite pass. In TF, everytime a graph pass + // happens, "constant" nodes are allocated + // and deallocated. Each allocation calls the + // (FindChunkPtr of BFCAllocator), + // which increments the value of AllocationId. + // Thus AllocationId becomes more than 3 and 4 if + // MKL is used. Now they are 9 and 10 for MKL. + EXPECT_EQ(19, cm->AllocationId(node, 0)); +#else EXPECT_EQ(21, cm->AllocationId(node, 0)); +#endif } else { +#ifdef INTEL_MKL + EXPECT_EQ(20, cm->AllocationId(node, 0)); +#else EXPECT_EQ(22, cm->AllocationId(node, 0)); +#endif } } EXPECT_LE(0, cm->MaxExecutionTime(node)); diff --git a/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc b/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc new file mode 100644 index 0000000000..5d583a8360 --- /dev/null +++ b/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc @@ -0,0 +1,53 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifdef INTEL_MKL + +#include "tensorflow/core/common_runtime/threadpool_device.h" + +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/public/session_options.h" + +namespace tensorflow { + +#ifdef _OPENMP +TEST(MKLThreadPoolDeviceTest, TestOmpDefaults) { + SessionOptions options; + unsetenv("OMP_NUM_THREADS"); + + ThreadPoolDevice* tp = new ThreadPoolDevice( + options, "/device:CPU:0", Bytes(256), DeviceLocality(), cpu_allocator()); + + const int ht = port::NumHyperthreadsPerCore(); + EXPECT_EQ(omp_get_max_threads(), (port::NumSchedulableCPUs() + ht - 1) / ht); +} + +TEST(MKLThreadPoolDeviceTest, TestOmpPreSets) { + SessionOptions options; + setenv("OMP_NUM_THREADS", "314", 1); + + ThreadPoolDevice* tp = new ThreadPoolDevice( + options, "/device:CPU:0", Bytes(256), DeviceLocality(), cpu_allocator()); + + EXPECT_EQ(omp_get_max_threads(), 314); +} +#endif // _OPENMP + +} // namespace tensorflow + +#endif // INTEL_MKL diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc index 21912236d0..a5d31b75c7 100644 --- a/tensorflow/core/common_runtime/process_util.cc +++ b/tensorflow/core/common_runtime/process_util.cc @@ -16,8 +16,10 @@ limitations under the License. #include "tensorflow/core/common_runtime/process_util.h" #ifdef INTEL_MKL +#ifdef _OPENMP #include <omp.h> -#endif +#endif // _OPENMP +#endif // INTEL_MKL #include <string.h> #include "tensorflow/core/lib/core/threadpool.h" @@ -57,7 +59,10 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) { // MKL library executes ops in parallel using OMP threads // Set inter_op conservatively to avoid thread oversubscription that could // lead to severe perf degradations and OMP resource exhaustion - const int mkl_intra_op = omp_get_max_threads(); + int mkl_intra_op = 1; +#ifdef _OPENMP + mkl_intra_op = omp_get_max_threads(); +#endif // _OPENMP CHECK_GE(mkl_intra_op, 1); const int32 mkl_inter_op = std::max( (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2); @@ -68,7 +73,7 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) { #else // Default to using the number of cores available in the process. return port::NumSchedulableCPUs(); -#endif +#endif // INTEL_MKL } thread::ThreadPool* NewThreadPoolFromSessionOptions( diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc index f7a07fe503..74a87215e1 100644 --- a/tensorflow/core/common_runtime/threadpool_device.cc +++ b/tensorflow/core/common_runtime/threadpool_device.cc @@ -31,7 +31,11 @@ limitations under the License. #include "tensorflow/core/public/session_options.h" #ifdef INTEL_MKL +#ifdef _OPENMP +#include <omp.h> +#endif #include "tensorflow/core/common_runtime/mkl_cpu_allocator.h" +#include "tensorflow/core/platform/cpu_info.h" #endif namespace tensorflow { @@ -43,7 +47,26 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options, : LocalDevice(options, Device::BuildDeviceAttributes( name, DEVICE_CPU, memory_limit, locality)), allocator_(allocator), - scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {} + scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) { +#ifdef INTEL_MKL +#ifdef _OPENMP + const char* user_omp_threads = getenv("OMP_NUM_THREADS"); + if (user_omp_threads == nullptr) { + // OMP_NUM_THREADS controls MKL's intra-op parallelization + // Default to available physical cores + const int mkl_intra_op = port::NumSchedulableCPUs(); + const int ht = port::NumHyperthreadsPerCore(); + omp_set_num_threads((mkl_intra_op + ht - 1) / ht); + } else { + uint64 user_val = 0; + if (strings::safe_strtou64(user_omp_threads, &user_val)) { + // Superflous but triggers OpenMP loading + omp_set_num_threads(user_val); + } + } +#endif // _OPENMP +#endif // INTEL_MKL +} ThreadPoolDevice::~ThreadPoolDevice() {} diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc b/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc index 1cea1b1462..770a0fcf14 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc @@ -147,7 +147,9 @@ MasterService::Stub::Stub( } MasterService::AsyncService::AsyncService() { - for (int i = 0; i < 10; ++i) { + int method_len = sizeof(grpcMasterService_method_names) / + sizeof(grpcMasterService_method_names[0]); + for (int i = 0; i < method_len; ++i) { AddMethod(new ::grpc::internal::RpcServiceMethod( grpcMasterService_method_names[i], ::grpc::internal::RpcMethod::NORMAL_RPC, nullptr)); diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc b/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc index 89f83f9f24..a8508d2d4f 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/core/distributed_runtime/rpc/grpc_session.h" #include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/env.h" #include "tensorflow/core/util/device_name_utils.h" namespace tensorflow { @@ -50,9 +51,14 @@ Status TestCluster::MakeTestCluster(const SessionOptions& options, int n, } for (int i = 0; i < n; ++i) { + string server_file = + strings::StrCat(testing::TensorFlowSrcRoot(), + "/core/distributed_runtime/rpc/grpc_testlib_server"); + if (!options.env->FileExists(server_file).ok()) { + return errors::Internal("Could not find grpc_testlib_server"); + } const std::vector<string> argv( - {strings::StrCat(testing::TensorFlowSrcRoot(), - "/core/distributed_runtime/rpc/grpc_testlib_server"), + {server_file, /* see grpc_testlib_server.cc for flags */ tf_jobs, "--tf_job=localhost", strings::StrCat("--tf_task=", i), strings::StrCat("--num_cpus=", num_cpus), diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h index 2c87156dca..2bb4d32d57 100644 --- a/tensorflow/core/framework/allocator.h +++ b/tensorflow/core/framework/allocator.h @@ -67,13 +67,8 @@ struct AllocatorStats { // device memory. class Allocator { public: -#ifdef EIGEN_VECTORIZE_AVX512 // Align to 64 byte boundary. static constexpr size_t kAllocatorAlignment = 64; -#else - // Align to 32 byte boundary. - static constexpr size_t kAllocatorAlignment = 32; -#endif virtual ~Allocator(); diff --git a/tensorflow/core/framework/op_gen_lib.cc b/tensorflow/core/framework/op_gen_lib.cc index 3d7920a6e2..4b56d807df 100644 --- a/tensorflow/core/framework/op_gen_lib.cc +++ b/tensorflow/core/framework/op_gen_lib.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/framework/op_gen_lib.h" +#include <algorithm> #include <vector> #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/lib/core/errors.h" diff --git a/tensorflow/core/framework/remote_fused_graph_execute_info.proto b/tensorflow/core/framework/remote_fused_graph_execute_info.proto index eb689ec1e6..10072724d2 100644 --- a/tensorflow/core/framework/remote_fused_graph_execute_info.proto +++ b/tensorflow/core/framework/remote_fused_graph_execute_info.proto @@ -5,7 +5,7 @@ option cc_enable_arenas = true; option java_outer_classname = "RemoteFusedGraphExecuteInfoProto"; option java_multiple_files = true; option java_package = "org.tensorflow.framework"; -//add go_package externally +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework"; import "tensorflow/core/framework/graph.proto"; import "tensorflow/core/framework/tensor_shape.proto"; import "tensorflow/core/framework/types.proto"; diff --git a/tensorflow/core/framework/tensor_test.cc b/tensorflow/core/framework/tensor_test.cc index b613effd18..80e168df97 100644 --- a/tensorflow/core/framework/tensor_test.cc +++ b/tensorflow/core/framework/tensor_test.cc @@ -1147,29 +1147,29 @@ TEST(Tensor, FailureToAllocate) { // On the alignment. // -// As of 2015/8, tensorflow::Tensor allocates its buffer with 32-byte +// As of 2018/5, tensorflow::Tensor allocates its buffer with 64-byte // alignment. Tensor::tensor/flat/vec/matrix methods requires the // buffer satisfies Eigen::Aligned (e.g., 16-bytes aligned usually, -// and 32-bytes for AVX). Tensor::Slice requires the caller to ensure -// its result is aligned if the caller intends to use those methods. -// In this test case, we simply make sure each slice is 32-byte -// aligned: sizeof(float) * 4 * 2 = 32. +// 32-bytes for AVX, and 64-bytes for AVX512). Tensor::Slice requires +// the caller to ensure its result is aligned if the caller intends +// to use those methods. In this test case, we simply make sure each +// slice is 64-byte aligned: sizeof(float) * 4 * 36 = 576. 576 % 64 = 0. TEST(Tensor, Slice_Basic) { Tensor saved; { // General - Tensor x(DT_FLOAT, TensorShape({10, 4, 34})); + Tensor x(DT_FLOAT, TensorShape({10, 4, 36})); // Fills in known values. for (int i = 0; i < 10; ++i) { x.Slice(i, i + 1).flat<float>().setConstant(i * 1.f); } // A simple slice along dim0. Tensor y = x.Slice(4, 8); - EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 4, 34}))); + EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 4, 36}))); auto tx = x.tensor<float, 3>(); auto ty = y.tensor<float, 3>(); for (int i = 0; i < 4; ++i) { for (int j = 0; j < 4; ++j) { - for (int k = 0; k < 34; ++k) { + for (int k = 0; k < 36; ++k) { EXPECT_EQ(ty(i, j, k), 4.0 + i); EXPECT_EQ(&tx(4 + i, j, k), &ty(i, j, k)); } @@ -1186,7 +1186,7 @@ TEST(Tensor, Slice_Basic) { auto tz = z.tensor<float, 3>(); EXPECT_EQ(1, z.dim_size(0)); for (int j = 0; j < 4; ++j) { - for (int k = 0; k < 34; ++k) { + for (int k = 0; k < 36; ++k) { EXPECT_EQ(tz(0, j, k), 6.0); } } @@ -1198,16 +1198,16 @@ TEST(Tensor, Slice_Basic) { EXPECT_EQ(1, saved.dim_size(0)); auto tsaved = saved.tensor<float, 3>(); for (int j = 0; j < 4; ++j) { - for (int k = 0; k < 34; ++k) { + for (int k = 0; k < 36; ++k) { EXPECT_EQ(tsaved(0, j, k), 6.0); } } } { // Empty - Tensor x(DT_FLOAT, TensorShape({10, 0, 34})); + Tensor x(DT_FLOAT, TensorShape({10, 0, 36})); x.flat<float>().setRandom(); Tensor y = x.Slice(4, 8); - EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 0, 34}))); + EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 0, 36}))); } { diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 72a13d4da7..b9667998d6 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2691,14 +2691,14 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // If Op has been specifically assigned to a non-CPU device, then No. if (!n->assigned_device_name().empty() && - !str_util::StrContains(n->assigned_device_name(),kCPUDeviceSubStr)) { + !str_util::StrContains(n->assigned_device_name(), kCPUDeviceSubStr)) { result = false; reason = "Op has been assigned a runtime device that is not CPU."; } // If user has specifically assigned this op to a non-CPU device, then No. if (!n->def().device().empty() && - !str_util::StrContains(n->def().device(),kCPUDeviceSubStr)) { + !str_util::StrContains(n->def().device(), kCPUDeviceSubStr)) { result = false; reason = "User has assigned a device that is not CPU."; } @@ -2865,9 +2865,9 @@ class MklLayoutRewritePass : public GraphOptimizationPass { return false; } - // If the depth_radius of LRN is not 2, then MKL DNN takes unoptimized - // path. The unoptimized path is slow. Thus we dont rewrite the node - // and use default Eigen. But for depth_radius=2, MKL DNN optimized + // If the depth_radius of LRN is not 2, then MKL DNN takes unoptimized + // path. The unoptimized path is slow. Thus we dont rewrite the node + // and use default Eigen. But for depth_radius=2, MKL DNN optimized // path is taken, i.e., eigen node is rewritten by MKl DNN node. static bool LrnRewrite(const Node* n) { CHECK_NOTNULL(n); @@ -2876,13 +2876,13 @@ class MklLayoutRewritePass : public GraphOptimizationPass { CHECK_EQ(GetNodeAttr(n->def(), "depth_radius", &depth_radius).ok(), true); // if the depth_radius of LRN is not 2, don't rewrite the node by MKL DNN - // and use eigen node instead + // and use eigen node instead if (depth_radius == 2) { return true; } VLOG(1) << "LrnRewrite: The model sets depth_radius as not 2 which" << "case is not optimized by Intel MKL, thus using Eigen op" - << "for LRN " ; + << "for LRN "; return false; } @@ -3015,6 +3015,35 @@ class MklLayoutRewritePass : public GraphOptimizationPass { std::vector<NodeBuilder::NodeOut>* ws_tensors, bool* are_ws_tensors_added); + // Helper function used by FixMklMetaDataEdges. Fixes the metadata edge + // pointed by 'e_metadata' corresponding to the data edge 'e_data' in graph + // 'g'. Returns true is fixup was done; otherwise, it returns false. + bool FixMklMetaDataEdgeIfNeeded(std::unique_ptr<Graph>* g, + const Edge* e_data, const Edge* e_metadata); + + // Are the input Mkl metadata edges for node 'n' in graph 'g' correctly + // connected? If not, then fix them. This is needed because a graph may have + // some input Mkl metadata edges incorrectly setup after node merge and + // rewrite passes. This could happen because GetReversePostOrder function may + // not provide topologically sorted order if a graph contains cycles. The + // function returns true if at least one Mkl metadata edge for node 'n' was + // fixed. Otherwise, it returns false. + // + // Example: + // + // X = MklConv2D(_, _, _) + // Y = MklConv2DWithBias(_, _, _, _, _, _) + // Z = MklAdd(X, Y, DummyMklTensor, Y:1) + // + // For a graph such as shown above, note that 3rd argument of MklAdd contains + // DummyMklTensor. Actually, it should be getting the Mkl metadata from + // MklConv2D op (specifically, X:2). This incorrect plumbing could be possible + // (although rare) if the Mkl NodeMerge + NodeRewrite passes visit Z before X + // (possible if X, Y, Z are part of a loop.) This function fixes the Mkl + // metadata edges only - it does not rewrite nodes nor does it modify the Mkl + // data edges (1st and 2nd arguments of MklAdd). + bool FixMklMetaDataEdges(std::unique_ptr<Graph>* g, Node* n); + // Functions specific to operators to copy attributes // We need operator-specific function to copy attributes because the framework // does not provide any generic function for it. @@ -4242,6 +4271,92 @@ MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const { } /////////////////////////////////////////////////////////////////////////////// +// Post-rewrite Mkl metadata fixup pass +/////////////////////////////////////////////////////////////////////////////// +bool MklLayoutRewritePass::FixMklMetaDataEdgeIfNeeded(std::unique_ptr<Graph>* g, + const Edge* e_data, const Edge* e_metadata) { + if (g == nullptr || e_data == nullptr || e_metadata == nullptr) { + return false; + } + + Node* n_data = e_data->src(); + int n_data_op_slot = e_data->src_output(); + int n_metadata_op_slot = GetTensorMetaDataIndex(n_data_op_slot, + n_data->num_outputs()); + + // If the source of meta edge is a constant node (producing dummy Mkl metadata + // tensor), then we will need to fix. + if (IsConstant(e_metadata->src())) { + Node* e_metadata_dst = e_metadata->dst(); + int e_metadata_in_slot = e_metadata->dst_input(); + CHECK_NOTNULL((*g)->AddEdge(n_data, n_metadata_op_slot, + e_metadata_dst, e_metadata_in_slot)); + + (*g)->RemoveEdge(e_metadata); + return true; + } + + return false; +} + +bool MklLayoutRewritePass::FixMklMetaDataEdges(std::unique_ptr<Graph>* g, + Node* n) { + bool result = false; + + // If graph node is not Mkl node, then return. + DataType T = DT_INVALID; + if (!GetNodeAttr(n->def(), "T", &T).ok() || + !mkl_op_registry::IsMklOp(n->type_string(), T)) { + return result; + } + + // If it is Mkl node, then check if the input edges to this node that carry + // Mkl metadata are linked up correctly with the source node. + + // For Mkl nodes, we generate twice the number of input tensors (n for Mkl + // data tensors + n for Mkl metadata tensors). We need to check for correct + // connection of n metadata tensors only. + int num_data_inputs = n->num_inputs() / 2; + for (int idx = 0; idx < num_data_inputs; idx++) { + // Get the edge connecting input slot with index (idx). + const Edge* e = nullptr; + TF_CHECK_OK(n->input_edge(idx, &e)); + + // If e is control edge, then skip. + if (e->IsControlEdge()) { + continue; + } + + // Check that the source node for edge 'e' is Mkl node. If it is not an Mkl + // node, then we don't need to do anything. + Node* e_src = e->src(); + if (GetNodeAttr(e_src->def(), "T", &T).ok() && + mkl_op_registry::IsMklOp(e_src->type_string(), T)) { + // Source node for edge 'e' is Mkl node. + // Destination node and destination input slot of e is node 'n' and 'idx' + // resp. + CHECK_EQ(e->dst(), n); + CHECK_EQ(e->dst_input(), idx); + + // Let's get edge that carries Mkl metadata corresponding to Mkl data edge + // 'e'. For that, let's first get the input slot of 'n' where the meta + // edge will feed the value. + int e_meta_in_slot = GetTensorMetaDataIndex(e->dst_input(), + n->num_inputs()); + const Edge* e_meta = nullptr; + TF_CHECK_OK(n->input_edge(e_meta_in_slot, &e_meta)); + + // Let's check if we need to fix this meta edge. + if (FixMklMetaDataEdgeIfNeeded(g, e, e_meta)) { + result = true; + } + } + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// // Run function for the pass /////////////////////////////////////////////////////////////////////////////// @@ -4307,6 +4422,25 @@ bool MklLayoutRewritePass::RunPass(std::unique_ptr<Graph>* g) { DumpGraph("After running MklLayoutRewritePass(NodeMerge+Rewrite)", &**g); + order.clear(); + GetReversePostOrder(**g, &order); // This will give us topological sort. + for (Node* n : order) { + // If node is not an op or it cannot run on CPU device, then skip. + if (!n->IsOp() || !CanOpRunOnCPUDevice(n)) { + continue; + } + if (FixMklMetaDataEdges(g, n)) { + string node_name = n->name(); + string op_name = n->type_string(); + + VLOG(1) << "MklLayoutRewritePass: fixed metadata edges for node " + << node_name << " with op " << op_name; + result = true; + } + } + DumpGraph("After running MklLayoutRewritePass(NodeMerge+Rewrite+Fixup)", + &**g); + return result; } diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc index 029cdcf94a..7645b4a7f0 100644 --- a/tensorflow/core/graph/mkl_layout_pass_test.cc +++ b/tensorflow/core/graph/mkl_layout_pass_test.cc @@ -3519,6 +3519,37 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_DeviceTest) { } ///////////////////////////////////////////////////////////////////// +// Post-rewrite fixup pass test + +TEST_F(MklLayoutPassTest, PostRewriteFixUpPass) { + InitGraph( + "node { name: 'A' op: 'Input'}" + "node { name: 'B' op: 'Input'}" + "node { name: 'M' op: '_MklInput'}" + "node { name: 'N' op: '_MklInput'}" + "node { name: 'C' op: '_MklConv2D'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'data_format' value { s: 'NCHW' } }" + " attr { key: 'use_cudnn_on_gpu' value { b: false } }" + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" + " attr { key: 'padding' value { s: 'SAME' } }" + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" + " input: ['A', 'B', 'M', 'N']}" + "node { name: 'D' op: 'Const' " + " attr { key: 'dtype' value { type: DT_UINT8 } }" + " attr { key: 'value' value { " + " tensor { dtype: DT_UINT8 tensor_shape { dim { size: 1 } } " + " int_val: 0 } } } }" + "node { name: 'E' op: '_MklAdd'" + " attr {key: 'T' value { type: DT_FLOAT } }" + " input: ['C', 'A', 'D', 'D']}"); + EXPECT_EQ(DoMklLayoutOptimizationPass(), + "A(Input);B(Input);C(_MklConv2D);D(Const);E(_MklAdd);" + "M(_MklInput);N(_MklInput)|A->C;A->E:1;B->C:1;C->E;C:2->E:2;" + "D->E:3;M->C:2;N->C:3"); +} + +///////////////////////////////////////////////////////////////////// static void BM_MklLayoutRewritePass(int iters, int op_nodes) { testing::StopTiming(); diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index 6749a7c571..0c02876ac5 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -610,7 +610,6 @@ class SymbolicShapeRefiner { } }; - // Compute the shape of the tensors outputed by node 'node' at output port // 'port_index' as the union of shape1 and shape2. ShapeHandle OutputAsUnion(const NodeDef* node, int port_index, ShapeHandle shape1, ShapeHandle shape2) { diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 1b18087cdf..8ca726df0b 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -679,6 +679,7 @@ cc_library( deps = [ ":constant_folding", ":graph_optimizer", + "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:graph_view", "//tensorflow/core/grappler:grappler_item", @@ -780,7 +781,6 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", - "//tensorflow/core:scoped_allocator_ops_op_lib", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:op_types", "//tensorflow/core/grappler:utils", diff --git a/tensorflow/core/grappler/optimizers/remapper.cc b/tensorflow/core/grappler/optimizers/remapper.cc index 4dde7ed1b4..03e36a7b9c 100644 --- a/tensorflow/core/grappler/optimizers/remapper.cc +++ b/tensorflow/core/grappler/optimizers/remapper.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/grappler/op_types.h" #include "tensorflow/core/grappler/optimizers/constant_folding.h" #include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/platform/logging.h" namespace tensorflow { namespace grappler { @@ -200,8 +201,7 @@ Status Remapper::Optimize(Cluster* /*cluster*/, const GrapplerItem& item, } } if (optimizable) { - VLOG(2) << "Optimizing fused batch norm node " << node.DebugString() - << std::endl; + VLOG(1) << "Optimizing fused batch norm node " << node.DebugString(); AddBatchNormNodes(optimized_graph, node); continue; } diff --git a/tensorflow/core/kernels/as_string_op.cc b/tensorflow/core/kernels/as_string_op.cc index 66c4aff3e3..a7757d1361 100644 --- a/tensorflow/core/kernels/as_string_op.cc +++ b/tensorflow/core/kernels/as_string_op.cc @@ -73,6 +73,7 @@ class AsStringOp : public OpKernel { } switch (dtype) { case DT_INT8: + case DT_INT16: case DT_INT32: strings::Appendf(&format_, "d"); break; @@ -129,6 +130,7 @@ class AsStringOp : public OpKernel { ENCODE_TYPE(DT_FLOAT, float, format_); ENCODE_TYPE(DT_DOUBLE, double, format_); ENCODE_TYPE(DT_INT8, int8, format_); + ENCODE_TYPE(DT_INT16, int16, format_); case (DT_BOOL): { const auto& input_flat = input_tensor->flat<bool>(); for (int i = 0; i < input_flat.size(); ++i) { diff --git a/tensorflow/core/kernels/cwise_op_clip.cc b/tensorflow/core/kernels/cwise_op_clip.cc index 14d889e8e3..49b90e855b 100644 --- a/tensorflow/core/kernels/cwise_op_clip.cc +++ b/tensorflow/core/kernels/cwise_op_clip.cc @@ -33,52 +33,41 @@ class ClipOp : public OpKernel { const Tensor& in0 = ctx->input(0); const Tensor& in1 = ctx->input(1); const Tensor& in2 = ctx->input(2); + OP_REQUIRES(ctx, (in0.shape() == in1.shape() || + TensorShapeUtils::IsScalar(in1.shape())) && + (in0.shape() == in2.shape() || + TensorShapeUtils::IsScalar(in2.shape())), + errors::InvalidArgument( + "clip_value_min and clip_value_max must be either of " + "the same shape as input, or a scalar. ", + "input shape: ", in0.shape().DebugString(), + "clip_value_min shape: ", in1.shape().DebugString(), + "clip_value_max shape: ", in2.shape().DebugString())); + + Tensor* out = nullptr; + OP_REQUIRES_OK( + ctx, ctx->forward_input_or_allocate_output({0}, 0, in0.shape(), &out)); + if (out->NumElements() == 0) return; // Nothing to do for empty output auto in0_flat = in0.flat<T>(); auto in1_flat = in1.flat<T>(); auto in2_flat = in2.flat<T>(); + auto out_flat = out->flat<T>(); const Device& d = ctx->eigen_device<Device>(); - Tensor* out = nullptr; - OP_REQUIRES_OK( - ctx, ctx->forward_input_or_allocate_output({0}, 0, in0.shape(), &out)); - auto out_flat = out->flat<T>(); if (in1.shape() == in2.shape()) { if (in0.shape() == in1.shape()) { functor::TernaryClipOp<Device, T>()(d, in0_flat, in1_flat, in2_flat, out_flat); } else { - OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(in1.shape()), - errors::InvalidArgument( - "clip_value_min and clip_value_max must be either of " - "the same shape as input, or a scalar. ", - "input shape: ", in0.shape().DebugString(), - "clip_value_min shape: ", in1.shape().DebugString(), - "clip_value_max shape: ", in2.shape().DebugString())); functor::UnaryClipOp<Device, T>()(d, in0_flat, in1_flat, in2_flat, out_flat); } } else { if (in0.shape() == in1.shape()) { - OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(in2.shape()), - errors::InvalidArgument( - "clip_value_min and clip_value_max must be either of " - "the same shape as input, or a scalar. ", - "input shape: ", in0.shape().DebugString(), - "clip_value_min shape: ", in1.shape().DebugString(), - "clip_value_max shape: ", in2.shape().DebugString())); functor::BinaryLeftClipOp<Device, T>()(d, in0_flat, in1_flat, in2_flat, out_flat); } else { - OP_REQUIRES(ctx, - (in0.shape() == in2.shape() && - TensorShapeUtils::IsScalar(in1.shape())), - errors::InvalidArgument( - "clip_value_min and clip_value_max must be either of " - "the same shape as input, or a scalar. ", - "input shape: ", in0.shape().DebugString(), - "clip_value_min shape: ", in1.shape().DebugString(), - "clip_value_max shape: ", in2.shape().DebugString())); functor::BinaryRightClipOp<Device, T>()(d, in0_flat, in1_flat, in2_flat, out_flat); } diff --git a/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc b/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc index 9a3b2303a3..17a85d9773 100644 --- a/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc @@ -57,6 +57,7 @@ struct DenseUpdate<GPUDevice, T, SUB> { template struct functor::DenseUpdate<GPUDevice, T, ADD>; \ template struct functor::DenseUpdate<GPUDevice, T, SUB>; TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); +TF_CALL_int32(DEFINE_GPU_KERNELS); TF_CALL_int64(DEFINE_GPU_KERNELS); #undef DEFINE_GPU_KERNELS diff --git a/tensorflow/core/kernels/gather_functor.cc b/tensorflow/core/kernels/gather_functor.cc index e6fefe643b..5cd8e04927 100644 --- a/tensorflow/core/kernels/gather_functor.cc +++ b/tensorflow/core/kernels/gather_functor.cc @@ -37,6 +37,7 @@ namespace functor { DECLARE_GPU_SPECS_INDEX(T, int32); \ DECLARE_GPU_SPECS_INDEX(T, int64) +TF_CALL_int64(DECLARE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); TF_CALL_complex64(DECLARE_GPU_SPECS); TF_CALL_complex128(DECLARE_GPU_SPECS); diff --git a/tensorflow/core/kernels/gather_functor_gpu.cu.cc b/tensorflow/core/kernels/gather_functor_gpu.cu.cc index 39b6924d74..4563fc6353 100644 --- a/tensorflow/core/kernels/gather_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/gather_functor_gpu.cu.cc @@ -31,6 +31,7 @@ typedef Eigen::GpuDevice GPUDevice; DEFINE_GPU_SPECS_INDEX(T, int32); \ DEFINE_GPU_SPECS_INDEX(T, int64); +TF_CALL_int64(DEFINE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS); TF_CALL_complex64(DEFINE_GPU_SPECS); TF_CALL_complex128(DEFINE_GPU_SPECS); diff --git a/tensorflow/core/kernels/gather_nd_op.cc b/tensorflow/core/kernels/gather_nd_op.cc index 7e5a9e1ec5..4e53291b7f 100644 --- a/tensorflow/core/kernels/gather_nd_op.cc +++ b/tensorflow/core/kernels/gather_nd_op.cc @@ -228,6 +228,8 @@ namespace functor { DECLARE_GPU_SPECS_INDEX(T, int32); \ DECLARE_GPU_SPECS_INDEX(T, int64) +TF_CALL_int32(DECLARE_GPU_SPECS); +TF_CALL_int64(DECLARE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); TF_CALL_complex64(DECLARE_GPU_SPECS); TF_CALL_complex128(DECLARE_GPU_SPECS); @@ -239,6 +241,8 @@ TF_CALL_complex128(DECLARE_GPU_SPECS); // Registration of the GPU implementations. #define REGISTER_GATHER_ND_GPU(type) REGISTER_GATHER_ND_ALL_INDICES(GPU, type) +TF_CALL_int32(REGISTER_GATHER_ND_GPU); +TF_CALL_int64(REGISTER_GATHER_ND_GPU); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GATHER_ND_GPU); TF_CALL_complex64(REGISTER_GATHER_ND_GPU); TF_CALL_complex128(REGISTER_GATHER_ND_GPU); diff --git a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc index b03efc684f..da8d2e9e3c 100644 --- a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc +++ b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc @@ -119,6 +119,8 @@ struct GatherNdSlice<GPUDevice, T, Index, IXDIM> { DEFINE_GPU_SPECS_INDEX(T, int32); \ DEFINE_GPU_SPECS_INDEX(T, int64); +TF_CALL_int32(DEFINE_GPU_SPECS); +TF_CALL_int64(DEFINE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS); TF_CALL_complex64(DEFINE_GPU_SPECS); TF_CALL_complex128(DEFINE_GPU_SPECS); diff --git a/tensorflow/core/kernels/gather_op.cc b/tensorflow/core/kernels/gather_op.cc index ef332ebee3..094504d6b9 100644 --- a/tensorflow/core/kernels/gather_op.cc +++ b/tensorflow/core/kernels/gather_op.cc @@ -153,6 +153,7 @@ TF_CALL_uint64(REGISTER_GATHER_CPU); // Registration of the GPU implementations. #define REGISTER_GATHER_GPU(type) REGISTER_GATHER_ALL_INDICES(GPU, type) +TF_CALL_int64(REGISTER_GATHER_GPU); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GATHER_GPU); TF_CALL_complex64(REGISTER_GATHER_GPU); TF_CALL_complex128(REGISTER_GATHER_GPU); diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc index 5eeb23d810..31d1b949ef 100644 --- a/tensorflow/core/kernels/mkl_concat_op.cc +++ b/tensorflow/core/kernels/mkl_concat_op.cc @@ -14,6 +14,7 @@ limitations under the License. #include <limits> #include <vector> +#include <unordered_map> #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" @@ -590,8 +591,8 @@ class MklConcatOp : public OpKernel { const int N = input_tensors.size(); // Get Tensor shapes. - std::vector<MklDnnShape> input_shapes(N); - GetMklShapeList(context, "values", &input_shapes); + std::vector<MklDnnShape> mkl_input_shapes(N); + GetMklShapeList(context, "values", &mkl_input_shapes); const Tensor& concat_dim_tensor = (AxisArgName == NAME_IS_CONCAT_DIM) ? MklGetInput(context, 0) @@ -610,19 +611,14 @@ class MklConcatOp : public OpKernel { int i = 0; bool invoke_eigen = false; bool are_all_mkl_inputs = true, are_all_tf_inputs = true; - const TensorShape expected_shape = input_shapes[0].IsMklTensor() - ? input_shapes[0].GetTfShape() - : input_tensors[0].shape(); + const TensorShape expected_shape = mkl_input_shapes[0].IsMklTensor() + ? mkl_input_shapes[0].GetTfShape() + : input_tensors[0].shape(); size_t expected_dims = expected_shape.dims(); if (concat_dim < 0) concat_dim = expected_dims + concat_dim; - for (auto& s : input_shapes) { - if (s == expected_shape) { - ++i; - continue; - } - + for (auto& s : mkl_input_shapes) { TensorShape s_shape = s.IsMklTensor() ? s.GetTfShape() : input_tensors[i].shape(); size_t s_dims = s_shape.dims(); @@ -665,21 +661,14 @@ class MklConcatOp : public OpKernel { // Call Eigen library if (invoke_eigen) { - TensorShapeList tf_input_shapes; - i = 0; - for (auto& s : input_shapes) { - TensorShape s_shape = - s.IsMklTensor() ? s.GetTfShape() : input_tensors[i].shape(); - tf_input_shapes.push_back(s_shape); - ++i; - } - CallEigenVersion(context, input_tensors, tf_input_shapes); + CallEigenVersion(context, input_tensors, mkl_input_shapes); return; } memory::dims dst_dims; + if (are_all_mkl_inputs) - dst_dims = TFShapeToMklDnnDims(input_shapes[0].GetTfShape()); + dst_dims = TFShapeToMklDnnDims(mkl_input_shapes[0].GetTfShape()); else // When all the inputs are in Tensorflow format, we don't know // what is the input data format. In that case, we just use @@ -689,26 +678,61 @@ class MklConcatOp : public OpKernel { std::vector<memory::primitive_desc> srcs_pd; std::vector<MklDnnData<T>> srcs(N, MklDnnData<T>(&cpu_engine)); int64 dst_concat_dim_size = 0; - for (int k = 0; k < N; k++) { - bool is_mkl_tensor = input_shapes[k].IsMklTensor(); - memory::dims src_dims; - - // Same comment as dst_dims for src_dims. - src_dims = (is_mkl_tensor) - ? TFShapeToMklDnnDims(input_shapes[k].GetTfShape()) - : TFShapeToMklDnnDims(input_tensors[k].shape()); - - dst_concat_dim_size += src_dims[concat_dim]; - auto src_md = - is_mkl_tensor ? input_shapes[k].GetMklLayout() : - // It does not matter what data format we use here - // (NHWC or NCHW). We just need to ensure that output - // of Concat uses same data format as input. - memory::desc(src_dims, MklDnnType<T>(), memory::format::nchw); - - srcs[k].SetUsrMem(src_md, &input_tensors[k]); - auto src_mpd = srcs[k].GetUsrMemPrimDesc(); - srcs_pd.push_back(src_mpd); + + bool isMklReorderNeeded = false; + memory::format mkl_common_format = memory::format::any; + if (are_all_mkl_inputs) { + mkl_common_format = + FindMklCommonFormat(mkl_input_shapes, concat_dim, + &isMklReorderNeeded, &dst_concat_dim_size); + + if (!isMklReorderNeeded) { + // All MKL tensors have a same format. Reorder is not needed. + for (int k = 0; k < N; k++) { + if (input_tensors[k].NumElements() == 0) + continue; + + auto src_md = mkl_input_shapes[k].GetMklLayout(); + srcs[k].SetUsrMem(src_md, &input_tensors[k]); + auto src_mpd = srcs[k].GetUsrMemPrimDesc(); + srcs_pd.push_back(src_mpd); + } + } else { + // MKL tensors have different formats. + // Reorder them to most common format. + for (int k = 0; k < N; k++) { + if (input_tensors[k].NumElements() == 0) + continue; + + auto src_dims = TFShapeToMklDnnDims( + mkl_input_shapes[k].GetTfShape()); + auto src_md = mkl_input_shapes[k].GetMklLayout(); + srcs[k].SetUsrMem(src_md, &input_tensors[k]); + + if (src_md.data.format != mkl_common_format) + src_md = memory::desc(src_dims, MklDnnType<T>(), + mkl_common_format); + + srcs_pd.push_back(memory::primitive_desc(src_md, cpu_engine)); + } + } + } else { // All TF inputs + for (int k = 0; k < N; k++) { + if (input_tensors[k].NumElements() == 0) + continue; + + memory::dims src_dims = TFShapeToMklDnnDims(input_tensors[k].shape()); + dst_concat_dim_size += src_dims[concat_dim]; + + // It does not matter what data format to be used (NHWC versus NCHW). + // We just need to ensure that output uses same data format as inputs. + auto src_md = + memory::desc(src_dims, MklDnnType<T>(), memory::format::nchw); + + srcs[k].SetUsrMem(src_md, &input_tensors[k]); + auto src_mpd = srcs[k].GetUsrMemPrimDesc(); + srcs_pd.push_back(src_mpd); + } } dst_dims[concat_dim] = dst_concat_dim_size; @@ -718,25 +742,33 @@ class MklConcatOp : public OpKernel { if (are_all_mkl_inputs) { // Since we are passing a specific format for destination, // we need to have dst_dims in MklDnn order (NCHW). - auto orig_tf_format = input_shapes[0].GetTfDataFormat(); + auto orig_tf_format = mkl_input_shapes[0].GetTfDataFormat(); dst_dims_in_nchw = MklDnnDimsInNCHW( dst_dims, MklDnnDataFormatToTFDataFormat(orig_tf_format)); - // We will set the output in the same format as input to avoid layout - // conversions. - // Currently we are setting dst format same as input format. - // See if we can make this choice in a better way. + // Set the output format same as the most common format of inputs + // to avoid layout conversions. dst_md = memory::desc( - dst_dims_in_nchw, MklDnnType<T>(), - (memory::format)input_shapes[0].GetMklLayout().data.format); + dst_dims_in_nchw, MklDnnType<T>(), mkl_common_format); } else { - // Again, format does not matter here. We just need to make it same as - // input format. + // All inputs are TF tensors. + // Set the output format same as input format (nchw). dst_md = memory::desc(dst_dims, MklDnnType<T>(), memory::format::nchw); } std::vector<primitive::at> inputs; - for (int k = 0; k < input_tensors.size(); k++) - inputs.push_back(srcs[k].GetOpMem()); + std::vector<primitive> net; + if (isMklReorderNeeded) { + for (int k = 0; k < input_tensors.size(); k++) { + if (input_tensors[k].NumElements() > 0) { + srcs[k].CheckReorderToOpMem(srcs_pd[k], &net); + } + } + } + for (int k = 0; k < input_tensors.size(); k++) { + if (input_tensors[k].NumElements() > 0) { + inputs.push_back(srcs[k].GetOpMem()); + } + } // If all inputs are in MKL format, then meaning of concat_dim needs to // change. Value of concat_dim is tied to input Tensorflow data format @@ -745,7 +777,8 @@ class MklConcatOp : public OpKernel { // But ifinput tensors are in NHWC order, then semantics need to change. // E.g., if we are concatinating over Channel (dimension 3 for NHWC), // then since MklDnn order is NCHW, concat_dim needs to be 1. - if (are_all_mkl_inputs) concat_dim = input_shapes[0].TfDimIdx(concat_dim); + if (are_all_mkl_inputs) + concat_dim = mkl_input_shapes[0].TfDimIdx(concat_dim); auto concat_pd = concat::primitive_desc(dst_md, concat_dim, srcs_pd); @@ -758,7 +791,7 @@ class MklConcatOp : public OpKernel { dnn_shape_dst.SetMklLayout(&dst_pd); dnn_shape_dst.SetElemType(MklDnnType<T>()); dnn_shape_dst.SetTfLayout(dst_dims.size(), dst_dims_in_nchw, - input_shapes[0].GetTfDataFormat()); + mkl_input_shapes[0].GetTfDataFormat()); tf_shape_dst.AddDim((dst_pd.get_size() / sizeof(T))); } else { dnn_shape_dst.SetMklTensor(false); @@ -773,7 +806,6 @@ class MklConcatOp : public OpKernel { dst.SetUsrMem(dst_md, dst_tensor); auto concat_op = concat(concat_pd, inputs, dst.GetOpMem()); - std::vector<primitive> net; net.push_back(concat_op); stream(stream::kind::eager).submit(net).wait(); } catch (mkldnn::error& e) { @@ -787,15 +819,27 @@ class MklConcatOp : public OpKernel { } void CallEigenVersion(OpKernelContext* context, const OpInputList& values, - const TensorShapeList& input_shapes) { - CHECK_EQ(values.size(), input_shapes.size()); + const MklDnnShapeList& mkl_input_shapes) { + CHECK_EQ(values.size(), mkl_input_shapes.size()); std::vector<Tensor> converted_values; - for (int i = 0; i < input_shapes.size(); i++) - converted_values.push_back(values[i]); + TensorShapeList tf_input_shapes; + for (int i = 0; i < mkl_input_shapes.size(); i++) { + if (mkl_input_shapes[i].IsMklTensor()) { + // do conversion from MKL to TF + Tensor tmp_tensor = + ConvertMklToTF<T>(context, values[i], mkl_input_shapes[i]); + converted_values.push_back(tmp_tensor); + tf_input_shapes.push_back(mkl_input_shapes[i].GetTfShape()); + } else { + // no conversion since it is TF tensor already + converted_values.push_back(values[i]); + tf_input_shapes.push_back(values[i].shape()); + } + } // Call Eigen concat. - eigen_concat_op_.Compute(context, converted_values, input_shapes); + eigen_concat_op_.Compute(context, converted_values, tf_input_shapes); // Set output Mkl tensor for this op. MklDnnShape dnn_shape_output; @@ -812,6 +856,55 @@ class MklConcatOp : public OpKernel { output_tensor->flat<uint8>().data(), output_tensor->flat<uint8>().size() * sizeof(uint8)); } + + // This method finds the most commom format accross all MKL inputs + // Inputs: + // 1. input_shapes: shapes of input (MKL) tensors. + // 2. concat_dim: concat dimension. + // Outputs: + // 1. is_reorder_needed is set to true if inputs have difference formats + // It is set to false otherwise. + // 2. concat_dim_size is the size of concat_dim. + // Return: + // return the common MKL format. + memory::format FindMklCommonFormat(const MklDnnShapeList& input_shapes, + int concat_dim, bool* is_reorder_needed, int64* concat_dim_size) { + *is_reorder_needed = false; + *concat_dim_size = 0; + std::unordered_map<int, int> occurrence_map; + if (input_shapes.size() == 0) + return memory::format::any; + + // Compute ocurrences of each format of all inputs. + for (int k=0; k <input_shapes.size(); k++) { + auto src_dims = TFShapeToMklDnnDims(input_shapes[k].GetTfShape()); + *concat_dim_size += src_dims[concat_dim]; + int fmt = static_cast<int>( + input_shapes[k].GetMklLayout().data.format); + occurrence_map[fmt] += 1; + } + + if (occurrence_map.size() == 1) { + // this means that all inputs have a same format + // return it with is_reorder_needed set false. + return static_cast<memory::format>( + input_shapes[0].GetMklLayout().data.format); + } + + // Input tensors have different formats. Thus, reorder is needed. + // We pick up the most common format to minimize the total + // number of input reorder. + memory::format commonest_format = memory::format::any; + int max_occurrence = 0; + *is_reorder_needed = true; + for (auto item : occurrence_map) { + if (item.second > max_occurrence) { + commonest_format = static_cast<memory::format>(item.first); + max_occurrence = item.second; + } + } + return commonest_format; + } }; #endif diff --git a/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc index c1da0ded1d..f857be6c32 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc @@ -18,6 +18,7 @@ limitations under the License. // bias. #ifdef INTEL_MKL +#ifdef INTEL_MKL_ML #define USE_EIGEN_TENSOR #define EIGEN_USE_THREADS @@ -264,4 +265,5 @@ class MklConv2DCustomBackpropBiasOp : public OpKernel { TF_CALL_float(REGISTER_CPU_KERNELS); #undef REGISTER_CPU_KERNELS } /* namespace tensorflow */ +#endif /* INTEL_MKL_ML */ #endif /* INTEL_MKL */ diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl_pooling_ops_common.h index 279167aba2..c0dfed7d7d 100644 --- a/tensorflow/core/kernels/mkl_pooling_ops_common.h +++ b/tensorflow/core/kernels/mkl_pooling_ops_common.h @@ -199,13 +199,15 @@ class MklPoolingForwardOpBase : public MklPoolingOpBase<T> { CHECK_NOTNULL(pool_params); CHECK_NOTNULL(dnn_data_input); TensorShape input_tensor_shape = input_tensor.shape(); - memory::desc input_md = + if (input_tensor.NumElements() != 0) { + memory::desc input_md = input_mkl_shape.IsMklTensor() ? input_mkl_shape.GetMklLayout() : memory::desc(TFShapeToMklDnnDimsInNCHW(input_tensor_shape, this->data_format_tf_), MklDnnType<T>(), this->data_format_mkldnn_); - dnn_data_input->SetUsrMem(input_md, &input_tensor); + dnn_data_input->SetUsrMem(input_md, &input_tensor); + } this->InitMklPoolParameters(context, pool_params, input_mkl_shape, input_tensor_shape); } diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc index 43c5b29509..e1fc2ea128 100644 --- a/tensorflow/core/kernels/scatter_nd_op.cc +++ b/tensorflow/core/kernels/scatter_nd_op.cc @@ -292,6 +292,7 @@ TF_CALL_string(REGISTER_SCATTER_ND_CPU); REGISTER_SCATTER_ND_UPDATE_GPU(type); \ REGISTER_SCATTER_ND_GPU(type); +TF_CALL_int32(REGISTER_SCATTER_ND_ALL_GPU); // TODO(b/66916790): Support half types in ScatterNd. TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_ALL_GPU); TF_CALL_complex64(REGISTER_SCATTER_ND_ALL_GPU); @@ -306,6 +307,8 @@ TF_CALL_complex128(REGISTER_SCATTER_ND_ALL_GPU); #define REGISTER_SCATTER_ND_UPDATE_SYCL(type) \ REGISTER_SCATTER_ND_UPDATE(type, SYCL); +TF_CALL_int32(REGISTER_SCATTER_ND_ADD_SUB_SYCL); +TF_CALL_int32(REGISTER_SCATTER_ND_UPDATE_SYCL); TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_ADD_SUB_SYCL); TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_UPDATE_SYCL); #undef REGISTER_SCATTER_ND_ADD_SUB_SYCL @@ -576,6 +579,7 @@ namespace functor { DECLARE_GPU_SPECS_INDEX(T, int32); \ DECLARE_GPU_SPECS_INDEX(T, int64) +TF_CALL_int32(DECLARE_GPU_SPECS); // TODO(b/66916790): Support half types in ScatterNd. TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); TF_CALL_complex64(DECLARE_GPU_SPECS); diff --git a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc index a3c21edc15..08b657f4c3 100644 --- a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc +++ b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc @@ -170,6 +170,7 @@ struct ScatterNdFunctor<GPUDevice, T, Index, op, IXDIM> { DECLARE_GPU_SPECS_INDEX(T, int32); \ DECLARE_GPU_SPECS_INDEX(T, int64) +TF_CALL_int32(DECLARE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); TF_CALL_complex64(DECLARE_GPU_SPECS); TF_CALL_complex128(DECLARE_GPU_SPECS); diff --git a/tensorflow/core/kernels/scoped_allocator_ops_test.cc b/tensorflow/core/kernels/scoped_allocator_ops_test.cc index bb0129fa6f..634f9ba887 100644 --- a/tensorflow/core/kernels/scoped_allocator_ops_test.cc +++ b/tensorflow/core/kernels/scoped_allocator_ops_test.cc @@ -216,8 +216,13 @@ TEST_F(ScopedAllocatorConcatOpTest, Success3) { } TEST_F(ScopedAllocatorConcatOpTest, Reshape) { - MakeOp({2, 2, 2}, DT_DOUBLE, true, "test", 120, 2); - ExecOp(DT_DOUBLE, 120, {{2, 2}, {2, 2}}); + MakeOp({2, 2, 4}, DT_DOUBLE, true, "test", 120, 2); + + // The elements of the third parameter to ExecOp must be multiples of + // Allocator::kAllocatorAlignment in size. If they are not, the backing + // tensor allocated by PrepOp will have too many elements and reshaping + // will fail. + ExecOp(DT_DOUBLE, 120, {{2, 4}, {2, 4}}); } TEST_F(ScopedAllocatorConcatOpTest, NoReshapeAttr) { diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h index 7796bf3587..d65692a552 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.h +++ b/tensorflow/core/kernels/segment_reduction_ops.h @@ -16,6 +16,14 @@ limitations under the License. #ifndef TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ #define TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ + +// This file requires the following include because it uses CudaAtomicMax: +// #include "tensorflow/core/util/cuda_kernel_helper.h" + +// Unfortunately we can't add the #include, since it breaks compilation for +// non-GPU targets. This only breaks in clang, because it's more strict for +// template code and CudaAtomicMax is used in template context. + // This file requires the following include because it uses CudaAtomicMax: // #include "tensorflow/core/util/cuda_kernel_helper.h" @@ -130,4 +138,4 @@ struct Highest { } // namespace functor } // namespace tensorflow -#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ +#endif // TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ diff --git a/tensorflow/core/kernels/sparse_matmul_op.cc b/tensorflow/core/kernels/sparse_matmul_op.cc index a1f9667b78..866c5dcd52 100644 --- a/tensorflow/core/kernels/sparse_matmul_op.cc +++ b/tensorflow/core/kernels/sparse_matmul_op.cc @@ -1490,7 +1490,7 @@ inline void LibxsmmSparseMatMul<TL, TR>::Compute( #endif // TENSORFLOW_USE_LIBXSMM -// Here is a an overview of the SparseMatMul code. Note that we assume that the +// Here is an overview of the SparseMatMul code. Note that we assume that the // left matrix is sparse. // // The matrix "left" is divided into a grid with blocksize of (M, KL). Each diff --git a/tensorflow/core/kernels/string_split_op.cc b/tensorflow/core/kernels/string_split_op.cc index 4c2b312c34..26ab72f12e 100644 --- a/tensorflow/core/kernels/string_split_op.cc +++ b/tensorflow/core/kernels/string_split_op.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/strings/str_util.h" namespace tensorflow { @@ -43,6 +44,63 @@ std::vector<string> Split(const string& str, const string& delimiter, return char_vector; } +std::vector<string> SplitV2(const string& str, StringPiece sep, int maxsplit) { + // This SplitV2 method matches the behavior of python's str.split: + // If sep is given, consecutive delimiters are not grouped together + // and are deemed to delimit empty strings (for example, '1,,2'.split(',') + // returns ['1', '', '2']). The sep argument may consist of multiple + // characters (for example, '1<>2<>3'.split('<>') returns ['1', '2', '3']). + // Splitting an empty string with a specified separator returns ['']. + // + // If sep is not specified or is None, a different splitting algorithm is + // applied: runs of consecutive whitespace are regarded as a single + // separator, and the result will contain no empty strings at the start or + // end if the string has leading or trailing whitespace. Consequently, + // splitting an empty string or a string consisting of just whitespace + // with a None separator returns []. + + std::vector<string> result; + + StringPiece text(str); + if (maxsplit == 0) { + result.emplace_back(std::string(text)); + return result; + } + + if (sep.empty()) { + StringPiece token; + // Remove leading whitespaces. + str_util::RemoveLeadingWhitespace(&text); + int split = 0; + while (str_util::ConsumeNonWhitespace(&text, &token)) { + result.emplace_back(std::string(token)); + str_util::RemoveLeadingWhitespace(&text); + ++split; + if (maxsplit > 0 && split == maxsplit) { + result.emplace_back(std::string(text)); + return result; + } + } + return result; + } + auto p = std::search(text.begin(), text.end(), sep.begin(), sep.end()); + int split = 0; + while (p != text.end()) { + StringPiece token = text.substr(0, p - text.begin()); + result.emplace_back(std::string(token)); + text.remove_prefix(token.size()); + text.remove_prefix(sep.size()); + ++split; + if (maxsplit > 0 && split == maxsplit) { + result.emplace_back(std::string(text)); + return result; + } + p = std::search(text.begin(), text.end(), sep.begin(), sep.end()); + } + result.emplace_back(std::string(text)); + return result; +} + } // namespace class StringSplitOp : public OpKernel { @@ -122,6 +180,78 @@ class StringSplitOp : public OpKernel { bool skip_empty_; }; +class StringSplitV2Op : public OpKernel { + public: + explicit StringSplitV2Op(OpKernelConstruction* context) + : OpKernel(context), maxsplit_(-1) { + OP_REQUIRES_OK(context, context->GetAttr("maxsplit", &maxsplit_)); + } + + void Compute(OpKernelContext* ctx) override { + const Tensor* input_tensor; + OP_REQUIRES_OK(ctx, ctx->input("input", &input_tensor)); + OP_REQUIRES(ctx, TensorShapeUtils::IsVector(input_tensor->shape()), + errors::InvalidArgument("input must be a vector, got shape: ", + input_tensor->shape().DebugString())); + + const auto input_vec = input_tensor->vec<string>(); + const int64 batch_size = input_vec.dimension(0); + + const Tensor* sep_tensor; + OP_REQUIRES_OK(ctx, ctx->input("sep", &sep_tensor)); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(sep_tensor->shape()), + errors::InvalidArgument("sep must be a scalar, got shape: ", + sep_tensor->shape().DebugString())); + const auto sep_vec = sep_tensor->flat<string>(); + StringPiece sep(sep_vec(0)); + std::vector<string> tokens; + // Guess that we'll be unpacking a handful of tokens per example. + static constexpr int kReserveSize = 4; + tokens.reserve(batch_size * kReserveSize); + + int64 output_size = 0; + int64 max_num_entries = 0; + std::vector<int64> num_indices(batch_size); + for (int64 i = 0; i < batch_size; ++i) { + std::vector<string> parts = SplitV2(input_vec(i), sep, maxsplit_); + int64 n_entries = parts.size(); + num_indices[i] = n_entries; + output_size += n_entries; + max_num_entries = std::max(max_num_entries, n_entries); + tokens.insert(tokens.end(), parts.begin(), parts.end()); + } + + Tensor* sp_indices_t; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({output_size, 2}), + &sp_indices_t)); + Tensor* sp_tokens_t; + OP_REQUIRES_OK( + ctx, ctx->allocate_output(1, TensorShape({output_size}), &sp_tokens_t)); + Tensor* sp_shape_t; + OP_REQUIRES_OK(ctx, ctx->allocate_output(2, TensorShape({2}), &sp_shape_t)); + + auto sp_indices = sp_indices_t->matrix<int64>(); + auto sp_tokens = sp_tokens_t->vec<string>(); + auto sp_shape = sp_shape_t->vec<int64>(); + sp_shape(0) = batch_size; + sp_shape(1) = max_num_entries; + size_t c = 0; + for (size_t i = 0; i < batch_size; ++i) { + for (size_t j = 0; j < num_indices[i]; ++j) { + sp_indices(c, 0) = i; + sp_indices(c, 1) = j; + sp_tokens(c) = tokens[c]; + ++c; + } + } + } + + private: + int maxsplit_; +}; + REGISTER_KERNEL_BUILDER(Name("StringSplit").Device(DEVICE_CPU), StringSplitOp); +REGISTER_KERNEL_BUILDER(Name("StringSplitV2").Device(DEVICE_CPU), + StringSplitV2Op); } // namespace tensorflow diff --git a/tensorflow/core/ops/candidate_sampling_ops.cc b/tensorflow/core/ops/candidate_sampling_ops.cc index 6e4d100b04..6e589c8d1c 100644 --- a/tensorflow/core/ops/candidate_sampling_ops.cc +++ b/tensorflow/core/ops/candidate_sampling_ops.cc @@ -145,12 +145,15 @@ REGISTER_OP("ComputeAccidentalHits") int64 num_true; TF_RETURN_IF_ERROR(c->GetAttr("num_true", &num_true)); - // Validate true_classes. + // Validate true_classes, must be a matrix. ShapeHandle true_classes; TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &true_classes)); DimensionHandle unused; TF_RETURN_IF_ERROR( c->WithValue(c->Dim(true_classes, 1), num_true, &unused)); + // Validate sampled_candidates, must be a vector. + ShapeHandle sampled_candidates; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &sampled_candidates)); // All three outputs are the same shape. ShapeHandle v = c->Vector(InferenceContext::kUnknownDim); diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index 15e0ca8af9..9dca5f53ce 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -218,7 +218,17 @@ REGISTER_OP("MapAndBatchDataset") .Attr("Targuments: list(type) >= 0") .Attr("output_types: list(type) >= 1") .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape); + .SetShapeFn([](shape_inference::InferenceContext* c) { + // Use index from the end to retrieve the Input shapes, + // so that to avoid guessing the length of "other_arguments". + // batch_size, num_parallel_batches, and drop_remainder are 0-D scalars. + shape_inference::ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 3), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 2), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 1), 0, &unused)); + + return shape_inference::ScalarShape(c); + }); REGISTER_OP("MapAndBatchDatasetV2") .Input("input_dataset: variant") @@ -231,7 +241,17 @@ REGISTER_OP("MapAndBatchDatasetV2") .Attr("Targuments: list(type) >= 0") .Attr("output_types: list(type) >= 1") .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape); + .SetShapeFn([](shape_inference::InferenceContext* c) { + // Use index from the end to retrieve the Input shapes, + // so that to avoid guessing the length of "other_arguments". + // batch_size, num_parallel_calls, and drop_remainder are 0-D scalars. + shape_inference::ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 3), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 2), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 1), 0, &unused)); + + return shape_inference::ScalarShape(c); + }); REGISTER_OP("PrefetchDataset") .Input("input_dataset: variant") diff --git a/tensorflow/core/ops/image_ops.cc b/tensorflow/core/ops/image_ops.cc index d949e70c66..87f4991134 100644 --- a/tensorflow/core/ops/image_ops.cc +++ b/tensorflow/core/ops/image_ops.cc @@ -454,7 +454,9 @@ REGISTER_OP("DrawBoundingBoxes") DimensionHandle unused; TF_RETURN_IF_ERROR(c->WithValue(c->Dim(boxes, 2), 4, &unused)); - return shape_inference::UnchangedShapeWithRankAtLeast(c, 3); + // The rank of the input image (rank = 4) has already been restricted + // above, and the output is of the same shape as the input. + return shape_inference::UnchangedShape(c); }); // -------------------------------------------------------------------------- diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 1740fa152c..b3487122e2 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1084,7 +1084,7 @@ REGISTER_OP("UnsortedSegmentProd") .Input("segment_ids: Tindices") .Input("num_segments: Tnumsegments") .Output("output: T") - .Attr("T: realnumbertype") + .Attr("T: numbertype") .Attr("Tindices: {int32,int64}") .Attr("Tnumsegments: {int32,int64} = DT_INT32") .SetShapeFn(UnsortedSegmentReductionShapeFn); diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index fc60e807b9..41efa49ce3 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -1453,6 +1453,7 @@ REGISTER_OP("QuantizedReluX") ShapeHandle unused; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); c->set_output(1, c->Scalar()); c->set_output(2, c->Scalar()); return Status::OK(); diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc index 1d5c743a56..4423062362 100644 --- a/tensorflow/core/ops/string_ops.cc +++ b/tensorflow/core/ops/string_ops.cc @@ -78,7 +78,7 @@ REGISTER_OP("ReduceJoin") REGISTER_OP("AsString") .Input("input: T") .Output("output: string") - .Attr("T: {int32, int64, complex64, float, double, bool, int8}") + .Attr("T: {int8, int16, int32, int64, complex64, float, double, bool}") .Attr("precision: int = -1") .Attr("scientific: bool = false") .Attr("shortest: bool = false") @@ -134,6 +134,24 @@ REGISTER_OP("StringSplit") return Status::OK(); }); +REGISTER_OP("StringSplitV2") + .Input("input: string") + .Input("sep: string") + .Output("indices: int64") + .Output("values: string") + .Output("shape: int64") + .Attr("maxsplit: int = -1") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + + c->set_output(0, c->Matrix(InferenceContext::kUnknownDim, 2)); + c->set_output(1, c->Vector(InferenceContext::kUnknownDim)); + c->set_output(2, c->Vector(2)); + return Status::OK(); + }); + REGISTER_OP("StringStrip") .Input("input: string") .Output("output: string") diff --git a/tensorflow/core/platform/cpu_info.cc b/tensorflow/core/platform/cpu_info.cc index 99de364042..e9da3d8e32 100644 --- a/tensorflow/core/platform/cpu_info.cc +++ b/tensorflow/core/platform/cpu_info.cc @@ -344,5 +344,28 @@ int CPUModelNum() { #endif } +int CPUIDNumSMT() { +#ifdef PLATFORM_IS_X86 + // https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration + // https://software.intel.com/en-us/articles/intel-sdm (Vol 3A) + // Section: Detecting Hardware Multi-threads Support and Topology + // Uses CPUID Leaf 11 to enumerate system topology on Intel x86 architectures + // Other cases not supported + uint32 eax, ebx, ecx, edx; + // Check if system supports Leaf 11 + GETCPUID(eax, ebx, ecx, edx, 0, 0); + if (eax >= 11) { + // 1) Leaf 11 available? CPUID.(EAX=11, ECX=0):EBX != 0 + // 2) SMT_Mask_Width = CPUID.(EAX=11, ECX=0):EAX[4:0] if CPUID.(EAX=11, + // ECX=0):ECX[15:8] is 1 + GETCPUID(eax, ebx, ecx, edx, 11, 0); + if (ebx != 0 && ((ecx & 0xff00) >> 8) == 1) { + return 1 << (eax & 0x1f); // 2 ^ SMT_Mask_Width + } + } +#endif // PLATFORM_IS_X86 + return 0; +} + } // namespace port } // namespace tensorflow diff --git a/tensorflow/core/platform/cpu_info.h b/tensorflow/core/platform/cpu_info.h index b5be7e8b54..175c9ae8b1 100644 --- a/tensorflow/core/platform/cpu_info.h +++ b/tensorflow/core/platform/cpu_info.h @@ -35,6 +35,10 @@ namespace port { // software can change it dynamically. int NumSchedulableCPUs(); +// Returns an estimate of the number of hyperthreads per physical core +// on the CPU +int NumHyperthreadsPerCore(); + // Mostly ISA related features that we care about enum CPUFeature { // Do not change numeric assignments. @@ -107,6 +111,9 @@ int CPUModelNum(); // Returns nominal core processor cycles per second of each processor. double NominalCPUFrequency(); +// Returns num of hyperthreads per physical core +int CPUIDNumSMT(); + } // namespace port } // namespace tensorflow diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index ae81f9b5b3..a319ccbdbe 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -71,6 +71,8 @@ def pyx_library( name = filename + "_cython_translation", srcs = [filename], outs = [filename.split(".")[0] + ".cpp"], + # Optionally use PYTHON_BIN_PATH on Linux platforms so that python 3 + # works. Windows has issues with cython_binary so skip PYTHON_BIN_PATH. cmd = "PYTHONHASHSEED=0 $(location @cython//:cython_binary) --cplus $(SRCS) --output-file $(OUTS)", tools = ["@cython//:cython_binary"] + pxd_srcs, ) diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.cc b/tensorflow/core/platform/hadoop/hadoop_file_system.cc index 72c12318ca..ff4b4436bb 100644 --- a/tensorflow/core/platform/hadoop/hadoop_file_system.cc +++ b/tensorflow/core/platform/hadoop/hadoop_file_system.cc @@ -115,18 +115,17 @@ class LibHDFS { const char* kLibHdfsDso = "libhdfs.so"; #endif char* hdfs_home = getenv("HADOOP_HDFS_HOME"); - if (hdfs_home == nullptr) { - status_ = errors::FailedPrecondition( - "Environment variable HADOOP_HDFS_HOME not set"); - return; - } - string path = io::JoinPath(hdfs_home, "lib", "native", kLibHdfsDso); - status_ = TryLoadAndBind(path.c_str(), &handle_); - if (!status_.ok()) { - // try load libhdfs.so using dynamic loader's search path in case - // libhdfs.so is installed in non-standard location - status_ = TryLoadAndBind(kLibHdfsDso, &handle_); + if (hdfs_home != nullptr) { + string path = io::JoinPath(hdfs_home, "lib", "native", kLibHdfsDso); + status_ = TryLoadAndBind(path.c_str(), &handle_); + if (status_.ok()) { + return; + } } + + // Try to load the library dynamically in case it has been installed + // to a in non-standard location. + status_ = TryLoadAndBind(kLibHdfsDso, &handle_); } Status status_; diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc index 8e316472fe..708f32ba80 100644 --- a/tensorflow/core/platform/posix/port.cc +++ b/tensorflow/core/platform/posix/port.cc @@ -74,6 +74,11 @@ int NumSchedulableCPUs() { return kDefaultCores; } +int NumHyperthreadsPerCore() { + static const int ht_per_core = tensorflow::port::CPUIDNumSMT(); + return (ht_per_core > 0) ? ht_per_core : 1; +} + void* AlignedMalloc(size_t size, int minimum_alignment) { #if defined(__ANDROID__) return memalign(minimum_alignment, size); diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 522a9d84fd..cb1fd09dbb 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -19,12 +19,12 @@ limitations under the License. // TensorFlow uses semantic versioning, see http://semver.org/. #define TF_MAJOR_VERSION 1 -#define TF_MINOR_VERSION 8 +#define TF_MINOR_VERSION 9 #define TF_PATCH_VERSION 0 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "" +#define TF_VERSION_SUFFIX "-rc0" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index dffc965b14..90b6533690 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -42,6 +42,7 @@ limitations under the License. #ifndef INTEL_MKL_ML #include "mkldnn.hpp" +#include "tensorflow/core/lib/core/stringpiece.h" using mkldnn::engine; using mkldnn::memory; @@ -712,15 +713,48 @@ inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor, return output_tensor; } #else +using mkldnn::stream; +template <typename T> class MklDnnData; + template <typename T> inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor, const MklDnnShape& mkl_shape) { Tensor output_tensor; - TensorShape output_shape; - - TF_CHECK_OK( - Status(error::Code::UNIMPLEMENTED, "Unimplemented conversion function")); - + try { + if (!mkl_shape.IsMklTensor()) + return mkl_tensor; // return input since it is already TF tensor + + TensorShape output_shape = mkl_shape.GetTfShape();; + + // Allocate output tensor. + context->allocate_temp(DataTypeToEnum<T>::v(), + output_shape, &output_tensor); + + auto cpu_engine = engine(engine::cpu, 0); + MklDnnData<T> input(&cpu_engine); + + // Get Mkl layout of input tensor. + auto input_mkl_md = mkl_shape.GetMklLayout(); + auto output_tf_md = mkl_shape.GetTfLayout(); + auto output_tf_pd = memory::primitive_desc(output_tf_md, cpu_engine); + input.SetUsrMem(input_mkl_md, &mkl_tensor); + + // reorder + if (input.IsReorderNeeded(output_tf_pd)) { + std::vector<primitive> net; + CHECK_EQ(input.CheckReorderToOpMem(output_tf_pd, &output_tensor, &net), + true); + stream(stream::kind::eager).submit(net).wait(); + } else { + // If not, just forward input tensor to output tensor. + CHECK(output_tensor.CopyFrom(mkl_tensor, output_shape)); + } + } catch (mkldnn::error& e) { + string error_msg = "Status: " + std::to_string(e.status) + + ", message: " + string(e.message) + ", in file " + + string(__FILE__) + ":" + std::to_string(__LINE__); + LOG(FATAL) << "Operation received an exception: " << error_msg; + } return output_tensor; } #endif @@ -1843,7 +1877,7 @@ class FactoryKeyCreator { template <typename T> void AddAsKey(const T data) { auto buffer = reinterpret_cast<const char *>(&data); - Append(absl::string_view(buffer, sizeof(T))); + Append(StringPiece(buffer, sizeof(T))); } std::string GetKey() { @@ -1854,8 +1888,8 @@ class FactoryKeyCreator { string key_; const char delimiter = 'x'; const int kMaxKeyLength = 256; - void Append(absl::string_view s) { - key_.append(string(s)); + void Append(StringPiece s) { + key_.append(s.ToString()); key_.append(1, delimiter); } }; diff --git a/tensorflow/docs_src/community/groups.md b/tensorflow/docs_src/community/groups.md index d92f5775fa..0b07d413da 100644 --- a/tensorflow/docs_src/community/groups.md +++ b/tensorflow/docs_src/community/groups.md @@ -1,17 +1,38 @@ # User Groups -TensorFlow has communities around the world. +TensorFlow has communities around the world. [Submit your community!](https://docs.google.com/forms/d/e/1FAIpQLSc_RQIUYtVgLLihzATaO_WUXkEyBDE_OoRoOXYDPmBEvHuEBA/viewform) ## Asia -* [TensorFlow Korea (TF-KR) User Group](https://www.facebook.com/groups/TensorFlowKR/) _(Korean language)_ -* [TensorFlow User Group Tokyo](https://tfug-tokyo.connpass.com/) _(Japanese Language)_ -* [Soleil Data Dojo](https://soleildatadojo.connpass.com/) _(Japanese language)_ +* [TensorFlow China community](https://www.tensorflowers.cn) +* [TensorFlow Korea (TF-KR) User Group](https://www.facebook.com/groups/TensorFlowKR/) +* [TensorFlow User Group Tokyo](https://tfug-tokyo.connpass.com/) +* [Soleil Data Dojo](https://soleildatadojo.connpass.com/) * [TensorFlow User Group Utsunomiya](https://tfug-utsunomiya.connpass.com/) +* [TensorFlow Philippines Community](https://www.facebook.com/groups/TensorFlowPH/) +* [TensorFlow and Deep Learning Singapore](https://www.meetup.com/TensorFlow-and-Deep-Learning-Singapore/) +* [TensorFlow India](https://www.facebook.com/tensorflowindia) ## Europe * [TensorFlow Barcelona](https://www.meetup.com/Barcelona-Machine-Learning-Meetup/) * [TensorFlow Madrid](https://www.meetup.com/TensorFlow-Madrid/) +* [Tensorflow Belgium](https://www.meetup.com/TensorFlow-Belgium) +* [TensorFlow x Rome Meetup](https://www.meetup.com/it-IT/TensorFlow-x-Rome-Meetup) +* [TensorFlow London](https://www.meetup.com/TensorFlow-London/) +* [TensorFlow Edinburgh](https://www.meetup.com/tensorflow-edinburgh/) + +## America + +* [TensorFlow Buenos Aires](https://www.meetup.com/TensorFlow-Buenos-Aires/) + + +## Oceania +* [Melbourne TensorFlow Meetup](https://www.meetup.com/Melbourne-TensorFlow-Meetup) + + +## Africa + +* [TensorFlow Tunis Meetup](https://www.meetup.com/fr-FR/TensorFlow-Tunis-Meetup/) diff --git a/tensorflow/docs_src/get_started/eager.md b/tensorflow/docs_src/get_started/eager.md index f08ac74425..bbb25e20c6 100644 --- a/tensorflow/docs_src/get_started/eager.md +++ b/tensorflow/docs_src/get_started/eager.md @@ -1,3 +1,3 @@ # Get Started with Eager Execution -[Colab notebook](https://colab.research.google.com/github/tensorflow/models/blob/r1.8.0/samples/core/get_started/eager.ipynb) +[Colab notebook](https://colab.research.google.com/github/tensorflow/models/blob/r1.9.0/samples/core/get_started/eager.ipynb) diff --git a/tensorflow/docs_src/get_started/index.md b/tensorflow/docs_src/get_started/index.md index 55579d52fb..232d2f1547 100644 --- a/tensorflow/docs_src/get_started/index.md +++ b/tensorflow/docs_src/get_started/index.md @@ -10,9 +10,9 @@ course prior to diving into TensorFlow documentation: TensorFlow is a tool for machine learning. While it contains a wide range of functionality, TensorFlow is mainly designed for deep neural network models. -The easiest way to get started with TensorFlow is using Eager Execution. +The easiest way to get started with TensorFlow is by using Eager Execution. - * @{$get_started/eager}, is for anyone new to machine learning or TensorFlow. + * @{$get_started/eager}, is for anyone new to machine learning or TensorFlow. TensorFlow provides many APIs. The remainder of this section focuses on the Estimator API which provide scalable, high-performance models. See the diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 1abd840ab3..2901848745 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.9.0-rc0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 52a2a3f8a6..55bc0f64e7 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.9.0-rc0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index 1256fb99c4..637231da12 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: <dependency> <groupId>org.tensorflow</groupId> <artifactId>tensorflow</artifactId> - <version>1.8.0</version> + <version>1.9.0-rc0</version> </dependency> ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: <dependency> <groupId>org.tensorflow</groupId> <artifactId>tensorflow</artifactId> - <version>1.8.0</version> + <version>1.9.0-rc0</version> </dependency> </dependencies> </project> @@ -124,12 +124,12 @@ instead: <dependency> <groupId>org.tensorflow</groupId> <artifactId>libtensorflow</artifactId> - <version>1.8.0</version> + <version>1.9.0-rc0</version> </dependency> <dependency> <groupId>org.tensorflow</groupId> <artifactId>libtensorflow_jni_gpu</artifactId> - <version>1.8.0</version> + <version>1.9.0-rc0</version> </dependency> ``` @@ -148,7 +148,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.9.0-rc0.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -167,7 +167,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.9.0-rc0.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -175,13 +175,13 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.9.0-rc0.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.9.0-rc0.zip). 3. Extract this .zip file. - +__Note__: The native library (`tensorflow_jni.dll`) requires `msvcp140.dll` at runtime, which is included in the [Visual C++ 2015 Redistributable](https://www.microsoft.com/en-us/download/details.aspx?id=48145) package. ### Validate the installation @@ -227,7 +227,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -<pre><b>javac -cp libtensorflow-1.8.0.jar HelloTF.java</b></pre> +<pre><b>javac -cp libtensorflow-1.9.0-rc0.jar HelloTF.java</b></pre> ### Running @@ -241,11 +241,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -<pre><b>java -cp libtensorflow-1.8.0.jar:. -Djava.library.path=./jni HelloTF</b></pre> +<pre><b>java -cp libtensorflow-1.9.0-rc0.jar:. -Djava.library.path=./jni HelloTF</b></pre> And the following command line executes the `HelloTF` program on Windows: -<pre><b>java -cp libtensorflow-1.8.0.jar;. -Djava.library.path=jni HelloTF</b></pre> +<pre><b>java -cp libtensorflow-1.9.0-rc0.jar;. -Djava.library.path=jni HelloTF</b></pre> If the program prints <tt>Hello from <i>version</i></tt>, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 0ed8160027..c8d706cf3c 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -339,9 +339,7 @@ Docker will download the TensorFlow binary image the first time you launch it. #### GPU support -Prior to installing TensorFlow with GPU support, ensure that your system meets all -[NVIDIA software requirements](#NVIDIARequirements). To launch a Docker container -with NVidia GPU support, enter a command of the following format: +To launch a Docker container with NVidia GPU support, enter a command of the following format (this [does not require any local CUDA installation](https://github.com/nvidia/nvidia-docker/wiki/CUDA#requirements)): <pre> $ <b>nvidia-docker run -it</b> <i>-p hostPort:containerPort TensorFlowGPUImage</i> @@ -438,7 +436,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: <pre> (tensorflow)$ <b>pip install --ignore-installed --upgrade \ - https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp34-cp34m-linux_x86_64.whl</b></pre> + https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp34-cp34m-linux_x86_64.whl</b></pre> <a name="ValidateYourInstallation"></a> ## Validate your installation @@ -517,7 +515,7 @@ on your system: from source. To use the TensorFlow binaries, version 3.5 or higher is required. See the [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for a list of supported GPU cards. -* [GPU drivers](http://nvidia.com/driver) that support your version of the CUDA +* [GPU drivers](http://nvidia.com/drivers) that support your version of the CUDA Toolkit. * The `libcupti-dev` library is the NVIDIA CUDA Profile Tools Interface. This library provides advanced profiling support. To install this library, @@ -684,14 +682,14 @@ This section documents the relevant values for Linux installations. CPU only: <pre> -https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp27-none-linux_x86_64.whl +https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp27-none-linux_x86_64.whl </pre> GPU support: <pre> -https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp27-none-linux_x86_64.whl +https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp27-none-linux_x86_64.whl </pre> Note that GPU support requires the NVIDIA hardware and software described in @@ -703,14 +701,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only: <pre> -https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp34-cp34m-linux_x86_64.whl +https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp34-cp34m-linux_x86_64.whl </pre> GPU support: <pre> -https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp34-cp34m-linux_x86_64.whl +https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp34-cp34m-linux_x86_64.whl </pre> Note that GPU support requires the NVIDIA hardware and software described in @@ -722,14 +720,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only: <pre> -https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp35-cp35m-linux_x86_64.whl +https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp35-cp35m-linux_x86_64.whl </pre> GPU support: <pre> -https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp35-cp35m-linux_x86_64.whl +https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp35-cp35m-linux_x86_64.whl </pre> @@ -741,14 +739,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only: <pre> -https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp36-cp36m-linux_x86_64.whl +https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp36-cp36m-linux_x86_64.whl </pre> GPU support: <pre> -https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp36-cp36m-linux_x86_64.whl +https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp36-cp36m-linux_x86_64.whl </pre> diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 29a867a9e3..9d01271c5a 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows: <pre> $ <b>pip3 install --upgrade \ - https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl</b></pre> + https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py3-none-any.whl</b></pre> If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -242,7 +242,7 @@ take the following steps: issue the following command: <pre> $ <b>sudo pip3 install --upgrade \ - https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl</b> </pre> + https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py3-none-any.whl</b> </pre> If the preceding command fails, see [installation problems](#common-installation-problems). @@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7: <pre> (<i>targetDirectory</i>)$ <b>pip install --ignore-installed --upgrade \ - https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.whl</b></pre> + https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py2-none-any.whl</b></pre> <a name="ValidateYourInstallation"></a> @@ -522,7 +522,7 @@ The value you specify depends on your Python version. <pre> -https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.whl +https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py2-none-any.whl </pre> @@ -530,5 +530,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any. <pre> -https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl +https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py3-none-any.whl </pre> diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 5ba522b436..dc6c1e36fc 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -81,7 +81,7 @@ or [macOS](#PrepareMac) -<a name="#PrepareLinux"></a> +<a name="PrepareLinux"></a> ## Prepare environment for Linux Before building TensorFlow on Linux, install the following build @@ -328,10 +328,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.8.0 on Linux: +for TensorFlow 1.9.0rc0 on Linux: <pre> -$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0-py2-none-any.whl</b> +$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.9.0rc0-py2-none-any.whl</b> </pre> ## Validate your installation @@ -373,9 +373,9 @@ The build and installation problems you encounter typically depend on the operating system. See the "Common installation problems" section of one of the following guides: - * @{$install_linux#CommonInstallationProblems$Installing TensorFlow on Linux} - * @{$install_mac#CommonInstallationProblems$Installing TensorFlow on Mac OS} - * @{$install_windows#CommonInstallationProblems$Installing TensorFlow on Windows} + * @{$install_linux#common_installation_problems$Installing TensorFlow on Linux} + * @{$install_mac#common_installation_problems$Installing TensorFlow on Mac OS} + * @{$install_windows#common_installation_problems$Installing TensorFlow on Windows} Beyond the errors documented in those two guides, the following table notes additional errors specific to building TensorFlow. Note that we @@ -433,6 +433,8 @@ Stack Overflow and specify the `tensorflow` tag. **Linux** <table> <tr><th>Version:</th><th>CPU/GPU:</th><th>Python Version:</th><th>Compiler:</th><th>Build Tools:</th><th>cuDNN:</th><th>CUDA:</th></tr> +<tr><td>tensorflow-1.9.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.11.0</td><td>N/A</td><td>N/A</td></tr> +<tr><td>tensorflow_gpu-1.9.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.11.0</td><td>7</td><td>9</td></tr> <tr><td>tensorflow-1.8.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.10.0</td><td>N/A</td><td>N/A</td></tr> <tr><td>tensorflow_gpu-1.8.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.9.0</td><td>7</td><td>9</td></tr> <tr><td>tensorflow-1.7.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.10.0</td><td>N/A</td><td>N/A</td></tr> @@ -456,6 +458,7 @@ Stack Overflow and specify the `tensorflow` tag. **Mac** <table> <tr><th>Version:</th><th>CPU/GPU:</th><th>Python Version:</th><th>Compiler:</th><th>Build Tools:</th><th>cuDNN:</th><th>CUDA:</th></tr> +<tr><td>tensorflow-1.9.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.11.0</td><td>N/A</td><td>N/A</td></tr> <tr><td>tensorflow-1.8.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.10.1</td><td>N/A</td><td>N/A</td></tr> <tr><td>tensorflow-1.7.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.10.1</td><td>N/A</td><td>N/A</td></tr> <tr><td>tensorflow-1.6.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.8.1</td><td>N/A</td><td>N/A</td></tr> @@ -472,6 +475,8 @@ Stack Overflow and specify the `tensorflow` tag. **Windows** <table> <tr><th>Version:</th><th>CPU/GPU:</th><th>Python Version:</th><th>Compiler:</th><th>Build Tools:</th><th>cuDNN:</th><th>CUDA:</th></tr> +<tr><td>tensorflow-1.9.0</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr> +<tr><td>tensorflow_gpu-1.9.0</td><td>GPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>7</td><td>9</td></tr> <tr><td>tensorflow-1.8.0</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr> <tr><td>tensorflow_gpu-1.8.0</td><td>GPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>7</td><td>9</td></tr> <tr><td>tensorflow-1.7.0</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr> diff --git a/tensorflow/docs_src/mobile/linking_libs.md b/tensorflow/docs_src/mobile/linking_libs.md index cf0db59021..efef5dd0da 100644 --- a/tensorflow/docs_src/mobile/linking_libs.md +++ b/tensorflow/docs_src/mobile/linking_libs.md @@ -27,7 +27,7 @@ called `libandroid_tensorflow_inference_java.jar`. There are three ways to include this functionality in your program: 1. Include the jcenter AAR which contains it, as in this - [example app](https://github.com/googlecodelabs/tensorflow-for-poets-2/blob/master/android/build.gradle#L59-L65) + [example app](https://github.com/googlecodelabs/tensorflow-for-poets-2/blob/master/android/tfmobile/build.gradle#L59-L65) 2. Download the nightly precompiled version from [ci.tensorflow.org](http://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/). diff --git a/tensorflow/docs_src/mobile/prepare_models.md b/tensorflow/docs_src/mobile/prepare_models.md index 8b22c04d87..2b84dbb973 100644 --- a/tensorflow/docs_src/mobile/prepare_models.md +++ b/tensorflow/docs_src/mobile/prepare_models.md @@ -105,8 +105,8 @@ inline constants so everything’s in one file. To handle the conversion, you†need the `freeze_graph.py` script, that’s held in [`tensorflow/python/tools/freeze_graph.py`](https://www.tensorflow.org/code/tensorflow/python/tools/freeze_graph.py). You’ll run it like this: - bazel build tensorflow/tools:freeze_graph - bazel-bin/tensorflow/tools/freeze_graph \ + bazel build tensorflow/python/tools:freeze_graph + bazel-bin/tensorflow/python/tools/freeze_graph \ --input_graph=/tmp/model/my_graph.pb \ --input_checkpoint=/tmp/model/model.ckpt-1000 \ --output_graph=/tmp/frozen_graph.pb \ diff --git a/tensorflow/docs_src/performance/quantization.md b/tensorflow/docs_src/performance/quantization.md index 2fea02d861..c97f74139c 100644 --- a/tensorflow/docs_src/performance/quantization.md +++ b/tensorflow/docs_src/performance/quantization.md @@ -227,8 +227,8 @@ of 30.0f, and an 8-bit array, the quantized values represent the following: <table> <tr><th>Quantized</th><th>Float</th></tr> <tr><td>0</td><td>-10.0</td></tr> - <tr><td>255</td><td>30.0</td></tr> <tr><td>128</td><td>10.0</td></tr> + <tr><td>255</td><td>30.0</td></tr> </table> <figcaption> <b>Table 2</b>: Example quantized value range diff --git a/tensorflow/docs_src/programmers_guide/estimators.md b/tensorflow/docs_src/programmers_guide/estimators.md index c4aae1d9d6..b13b47184d 100644 --- a/tensorflow/docs_src/programmers_guide/estimators.md +++ b/tensorflow/docs_src/programmers_guide/estimators.md @@ -21,18 +21,17 @@ Note: TensorFlow also includes a deprecated `Estimator` class at Estimators provide the following benefits: -* You can run Estimators-based models on a local host or on a +* You can run Estimator-based models on a local host or on a distributed multi-server environment without changing your model. - Furthermore, you can run Estimators-based models on CPUs, GPUs, + Furthermore, you can run Estimator-based models on CPUs, GPUs, or TPUs without recoding your model. * Estimators simplify sharing implementations between model developers. -* You can develop a state of the art model with high-level intuitive code, +* You can develop a state of the art model with high-level intuitive code. In short, it is generally much easier to create models with Estimators than with the low-level TensorFlow APIs. -* Estimators are themselves built on tf.layers, which +* Estimators are themselves built on @{tf.layers}, which simplifies customization. -* Estimators build the graph for you. In other words, you don't have to - build the graph. +* Estimators build the graph for you. * Estimators provide a safe distributed training loop that controls how and when to: * build the graph @@ -57,7 +56,7 @@ the "plumbing" for you. That is, pre-made Estimators create and manage pre-made Estimators let you experiment with different model architectures by making only minimal code changes. @{tf.estimator.DNNClassifier$`DNNClassifier`}, for example, is a pre-made Estimator class that trains classification models -through dense, feed-forward neural networks. +based on dense, feed-forward neural networks. ### Structure of a pre-made Estimators program @@ -79,7 +78,7 @@ of the following four steps: an input function: def input_fn(dataset): - ... # manipulate dataset, extracting feature names and the label + ... # manipulate dataset, extracting the feature dict and the label return feature_dict, label (See @{$programmers_guide/datasets} for full details.) @@ -96,13 +95,13 @@ of the following four steps: population = tf.feature_column.numeric_column('population') crime_rate = tf.feature_column.numeric_column('crime_rate') median_education = tf.feature_column.numeric_column('median_education', - normalizer_fn='lambda x: x - global_education_mean') + normalizer_fn=lambda x: x - global_education_mean) 3. **Instantiate the relevant pre-made Estimator.** For example, here's a sample instantiation of a pre-made Estimator named `LinearClassifier`: # Instantiate an estimator, passing the feature columns. - estimator = tf.estimator.Estimator.LinearClassifier( + estimator = tf.estimator.LinearClassifier( feature_columns=[population, crime_rate, median_education], ) diff --git a/tensorflow/docs_src/programmers_guide/feature_columns.md b/tensorflow/docs_src/programmers_guide/feature_columns.md index 845194fe0e..90f5c53a17 100644 --- a/tensorflow/docs_src/programmers_guide/feature_columns.md +++ b/tensorflow/docs_src/programmers_guide/feature_columns.md @@ -528,10 +528,10 @@ suggested by the following snippet: categorical_column = ... # Create any categorical column # Represent the categorical column as an embedding column. -# This means creating a one-hot vector with one element for each category. +# This means creating an embedding vector lookup table with one element for each category. embedding_column = tf.feature_column.embedding_column( categorical_column=categorical_column, - dimension=dimension_of_embedding_vector) + dimension=embedding_dimensions) ``` @{$programmers_guide/embedding$Embeddings} is a significant topic within machine diff --git a/tensorflow/examples/learn/iris.py b/tensorflow/examples/learn/iris.py index 03e60972aa..86f5204ec3 100644 --- a/tensorflow/examples/learn/iris.py +++ b/tensorflow/examples/learn/iris.py @@ -21,7 +21,8 @@ from __future__ import division from __future__ import print_function import os -import urllib + +from six.moves.urllib.request import urlretrieve import tensorflow as tf @@ -38,9 +39,7 @@ FEATURE_KEYS = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width'] def maybe_download_iris_data(file_name, download_url): """Downloads the file and returns the number of data.""" if not os.path.exists(file_name): - raw = urllib.urlopen(download_url).read() - with open(file_name, 'w') as f: - f.write(raw) + urlretrieve(download_url, file_name) # The first line is a comma-separated string. The first one is the number of # total data in the file. diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 5602775b62..a5224fbda0 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -10955,7 +10955,7 @@ func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistorted // SampleDistortedBoundingBoxAreaRange sets the optional area_range attribute to value. // // value: The cropped area of the image must contain a fraction of the -// supplied image within in this range. +// supplied image within this range. // If not specified, defaults to <f:0.05 f:1 > func SampleDistortedBoundingBoxAreaRange(value []float32) SampleDistortedBoundingBoxAttr { return func(m optionalAttr) { @@ -18098,9 +18098,10 @@ func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_val } // Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)` -// // if < 0, `scale * features` otherwise. // +// Assumes weights to have zero mean and variance 1.0 / fan_in. +// // See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) func Selu(scope *Scope, features tf.Output) (activations tf.Output) { if scope.Err() != nil { @@ -21625,7 +21626,7 @@ func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr { // generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. // // The `bad_color` argument is the color to use in the generated images for -// non-finite input values. It is a `unit8` 1-D tensor of length `channels`. +// non-finite input values. It is a `uint8` 1-D tensor of length `channels`. // Each element must be in the range `[0, 255]` (It represents the value of a // pixel in the output image). Non-finite values in the input tensor are // replaced by this tensor in the output image. The default value is the color @@ -24018,7 +24019,7 @@ func SampleDistortedBoundingBoxV2AspectRatioRange(value []float32) SampleDistort // SampleDistortedBoundingBoxV2AreaRange sets the optional area_range attribute to value. // // value: The cropped area of the image must contain a fraction of the -// supplied image within in this range. +// supplied image within this range. // If not specified, defaults to <f:0.05 f:1 > func SampleDistortedBoundingBoxV2AreaRange(value []float32) SampleDistortedBoundingBoxV2Attr { return func(m optionalAttr) { @@ -24714,8 +24715,7 @@ type DecodeProtoV2Attr func(optionalAttr) // If not specified, defaults to "local://" func DecodeProtoV2DescriptorSource(value string) DecodeProtoV2Attr { return func(m optionalAttr) { - m["descriptor_source"] = value - } + m["descriptor_source"] = value } } // DecodeProtoV2MessageFormat sets the optional message_format attribute to value. diff --git a/tensorflow/java/src/gen/cc/op_generator.cc b/tensorflow/java/src/gen/cc/op_generator.cc index debd95fc62..9b171f66ec 100644 --- a/tensorflow/java/src/gen/cc/op_generator.cc +++ b/tensorflow/java/src/gen/cc/op_generator.cc @@ -376,9 +376,6 @@ void GenerateOp(const OpSpec& op, const EndpointSpec& endpoint, } } // op annotations - op_class.add_annotation( - Annotation::Create("Generated", "javax.annotation") - .attributes("value = \"TensorFlow Java Op Generator\"")); if (endpoint.deprecated()) { op_class.add_annotation(Annotation::Create("Deprecated")); string explanation; @@ -415,8 +412,12 @@ void GenerateOp(const OpSpec& op, const EndpointSpec& endpoint, SourceFileWriter writer(op_file.get()); std::list<Type> dependencies; CollectOpDependencies(op, mode, &dependencies); - writer.Write(kLicense).EndLine().BeginType(op_class, PUBLIC | FINAL, - &dependencies, &op_javadoc); + writer.Write(kLicense) + .EndLine() + .Write("// This class has been generated, DO NOT EDIT!") + .EndLine() + .EndLine() + .BeginType(op_class, PUBLIC | FINAL, &dependencies, &op_javadoc); if (!op.optional_attributes().empty()) { RenderOptionsClass(op, op_class, &writer); } diff --git a/tensorflow/java/src/gen/cc/op_specs.cc b/tensorflow/java/src/gen/cc/op_specs.cc index 181fd4c5e3..941ab2699c 100644 --- a/tensorflow/java/src/gen/cc/op_specs.cc +++ b/tensorflow/java/src/gen/cc/op_specs.cc @@ -96,6 +96,7 @@ Type TypeResolver::TypeOf(const OpDef_ArgDef& arg_def, bool* iterable_out) { *iterable_out = true; visited_attrs_.insert(std::make_pair(arg_def.number_attr(), Type::Int())); } + Type type = Type::Wildcard(); if (arg_def.type() != DataType::DT_INVALID) { // resolve type from DataType diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index b2e6c60021..bd97b181ff 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -196,11 +196,11 @@ def implicit_val_and_grad(f): # TODO(cais): Remove calls to tf.constant() once the gradients functions # accept lists and np.ndarrays. - def grad_fn(*args): + def grad_fn(*args, **kwds): """Computes the gradient of the wrapped function.""" this_tape = tape.push_new_tape() try: - end_node = f(*args) + end_node = f(*args, **kwds) if end_node is None: raise ValueError("Cannot differentiate a function that returns None; " "did you forget to return a value from {}?".format( diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index 9cd17e0407..20522098b0 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -978,7 +978,10 @@ py_test( size = "large", srcs = ["keras_test.py"], srcs_version = "PY2AND3", - tags = ["notsan"], + tags = [ + "no_windows", + "notsan", + ], deps = [ ":keras", "//tensorflow/core:protos_all_py", diff --git a/tensorflow/python/estimator/exporter.py b/tensorflow/python/estimator/exporter.py index 7cdf840c97..b18212cfcd 100644 --- a/tensorflow/python/estimator/exporter.py +++ b/tensorflow/python/estimator/exporter.py @@ -156,7 +156,7 @@ def _loss_smaller(best_eval_result, current_eval_result): return best_eval_result[default_key] > current_eval_result[default_key] -def _verify_compre_fn_args(compare_fn): +def _verify_compare_fn_args(compare_fn): """Verifies compare_fn arguments.""" args = set(util.fn_args(compare_fn)) if 'best_eval_result' not in args: @@ -265,7 +265,7 @@ class BestExporter(Exporter): self._compare_fn = compare_fn if self._compare_fn is None: raise ValueError('`compare_fn` must not be None.') - _verify_compre_fn_args(self._compare_fn) + _verify_compare_fn_args(self._compare_fn) self._saved_model_exporter = _SavedModelExporter( name, serving_input_receiver_fn, assets_extra, as_text) diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py index 035c7c148c..a6cefdece2 100644 --- a/tensorflow/python/estimator/inputs/numpy_io.py +++ b/tensorflow/python/estimator/inputs/numpy_io.py @@ -136,11 +136,13 @@ def numpy_input_fn(x, values in `x` have same shape). ValueError: if duplicate keys are in both `x` and `y` when `y` is a dict. ValueError: if x or y is an empty dict. - TypeError: `x` is not a dict or array, or if `shuffle` is not bool. + TypeError: `x` is not a dict or array. + ValueError: if 'shuffle' is not provided or a bool. """ if not isinstance(shuffle, bool): - raise TypeError('shuffle must be explicitly set as boolean; ' - 'got {}'.format(shuffle)) + raise ValueError('shuffle must be provided and explicitly set as boolean ' + '(it is recommended to set it as True for training); ' + 'got {}'.format(shuffle)) def input_fn(): """Numpy input function.""" diff --git a/tensorflow/python/estimator/inputs/numpy_io_test.py b/tensorflow/python/estimator/inputs/numpy_io_test.py index 92d057e25d..81b201cc5c 100644 --- a/tensorflow/python/estimator/inputs/numpy_io_test.py +++ b/tensorflow/python/estimator/inputs/numpy_io_test.py @@ -286,8 +286,9 @@ class NumpyIoTest(test.TestCase): x = np.arange(32, 36) y = np.arange(4) with self.test_session(): - with self.assertRaisesRegexp(TypeError, - 'shuffle must be explicitly set as boolean'): + with self.assertRaisesRegexp(ValueError, + 'shuffle must be provided and explicitly ' + 'set as boolean'): # Default shuffle is None. numpy_io.numpy_input_fn(x, y) diff --git a/tensorflow/python/estimator/inputs/pandas_io.py b/tensorflow/python/estimator/inputs/pandas_io.py index 938e244fb3..57f8e5fd6a 100644 --- a/tensorflow/python/estimator/inputs/pandas_io.py +++ b/tensorflow/python/estimator/inputs/pandas_io.py @@ -68,15 +68,16 @@ def pandas_input_fn(x, Raises: ValueError: if `x` already contains a column with the same name as `y`, or if the indexes of `x` and `y` don't match. - TypeError: `shuffle` is not bool. + ValueError: if 'shuffle' is not provided or a bool. """ if not HAS_PANDAS: raise TypeError( 'pandas_input_fn should not be called without pandas installed') if not isinstance(shuffle, bool): - raise TypeError('shuffle must be explicitly set as boolean; ' - 'got {}'.format(shuffle)) + raise ValueError('shuffle must be provided and explicitly set as boolean ' + '(it is recommended to set it as True for training); ' + 'got {}'.format(shuffle)) x = x.copy() if y is not None: diff --git a/tensorflow/python/estimator/inputs/pandas_io_test.py b/tensorflow/python/estimator/inputs/pandas_io_test.py index e5912a3b28..dcecf6dd61 100644 --- a/tensorflow/python/estimator/inputs/pandas_io_test.py +++ b/tensorflow/python/estimator/inputs/pandas_io_test.py @@ -70,8 +70,9 @@ class PandasIoTest(test.TestCase): return x, _ = self.makeTestDataFrame() y_noindex = pd.Series(np.arange(-32, -28)) - with self.assertRaisesRegexp(TypeError, - 'shuffle must be explicitly set as boolean'): + with self.assertRaisesRegexp(ValueError, + 'shuffle must be provided and explicitly ' + 'set as boolean'): # Default shuffle is None pandas_io.pandas_input_fn(x, y_noindex) diff --git a/tensorflow/python/estimator/inputs/queues/feeding_functions.py b/tensorflow/python/estimator/inputs/queues/feeding_functions.py index 8e2ec83020..51a61adb21 100644 --- a/tensorflow/python/estimator/inputs/queues/feeding_functions.py +++ b/tensorflow/python/estimator/inputs/queues/feeding_functions.py @@ -250,7 +250,7 @@ class _PandasFeedFn(object): num_epochs=None): if len(placeholders) != len(dataframe.columns) + 1: raise ValueError("Expected {} placeholders; got {}.".format( - len(dataframe.columns), len(placeholders))) + len(dataframe.columns) + 1, len(placeholders))) self._index_placeholder = placeholders[0] self._col_placeholders = placeholders[1:] self._dataframe = dataframe diff --git a/tensorflow/python/estimator/keras.py b/tensorflow/python/estimator/keras.py index c80af08fba..2f439f765e 100644 --- a/tensorflow/python/estimator/keras.py +++ b/tensorflow/python/estimator/keras.py @@ -70,7 +70,7 @@ def _convert_tensor(x): return x -def _any_variable_initalized(): +def _any_variable_initialized(): """Check if any variable has been initialized in the Keras model. Returns: @@ -511,7 +511,7 @@ def model_to_estimator(keras_model=None, keras_model_fn, model_dir=model_dir, config=config) # Check if we need to call get_weights: - if _any_variable_initalized(): + if _any_variable_initialized(): keras_weights = keras_model.get_weights() # Warn if config passed to estimator tries to update GPUOptions. If a # session has already been created, the GPUOptions passed to the first diff --git a/tensorflow/python/estimator/keras_test.py b/tensorflow/python/estimator/keras_test.py index 6688a84130..5e094ae92b 100644 --- a/tensorflow/python/estimator/keras_test.py +++ b/tensorflow/python/estimator/keras_test.py @@ -31,10 +31,10 @@ from tensorflow.python.estimator import run_config as run_config_lib from tensorflow.python.estimator.inputs import numpy_io from tensorflow.python.framework import ops from tensorflow.python.framework import test_util -from tensorflow.python.keras import backend as K from tensorflow.python.keras import testing_utils from tensorflow.python.keras.applications import mobilenet from tensorflow.python.keras.optimizers import SGD +from tensorflow.python.ops.parsing_ops import gen_parsing_ops from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.summary.writer import writer_cache @@ -146,13 +146,13 @@ def randomize_io_type(array, name): def multi_inputs_multi_outputs_model(): a = keras.layers.Input(shape=(16,), name='input_a') b = keras.layers.Input(shape=(16,), name='input_b') - m = keras.layers.Input(shape=(8,), dtype='bool', name='input_m') + m = keras.layers.Input(shape=(8,), dtype='string', name='input_m') dense = keras.layers.Dense(8, name='dense_1') a_2 = dense(a) - # Apply a mask - s_2 = keras.layers.Lambda(lambda k: - K.switch(k[0], k[1], K.zeros_like(k[1])))([m, a_2]) + # Read m + m_2 = keras.layers.Lambda(gen_parsing_ops.string_to_number)(m) + s_2 = keras.layers.Lambda(lambda k: k[0] * k[1])([m_2, a_2]) b_2 = dense(b) merged = keras.layers.concatenate([s_2, b_2], name='merge') c = keras.layers.Dense(3, activation='softmax', name='dense_2')(merged) @@ -372,13 +372,13 @@ class TestKerasEstimator(test_util.TensorFlowTestCase): def train_input_fn(): input_dict = {'input_a': a_train, 'input_b': b_train, - 'input_m': input_m_train > 0} + 'input_m': input_m_train.astype(np.str)} output_dict = {'dense_2': c_train, 'dense_3': d_train} return input_dict, output_dict def eval_input_fn(): input_dict = {'input_a': a_test, 'input_b': b_test, - 'input_m': input_m_test > 0} + 'input_m': input_m_test.astype(np.str)} output_dict = {'dense_2': c_test, 'dense_3': d_test} return input_dict, output_dict diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py index 2d6925d1a8..af5d709f7e 100644 --- a/tensorflow/python/grappler/layout_optimizer_test.py +++ b/tensorflow/python/grappler/layout_optimizer_test.py @@ -1389,7 +1389,7 @@ class LayoutOptimizerTest(test.TestCase): expected_num_transposes = 3 self.assertEqual(expected_num_transposes, num_transposes) self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes) - self._assert_trans_nchw_to_nhwc('map/while/Add-0-2', nodes) + self._assert_trans_nchw_to_nhwc('map/while/Add_1-0-2', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3) def testLoopWithVecAnd4D(self): @@ -1413,7 +1413,7 @@ class LayoutOptimizerTest(test.TestCase): expected_num_transposes = 2 self.assertEqual(expected_num_transposes, num_transposes) self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes) - self._assert_trans_nchw_to_nhwc('map/while/Add-0-2', nodes) + self._assert_trans_nchw_to_nhwc('map/while/Add_1-0-2', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3) def testBinaryOpSecondPort(self): diff --git a/tensorflow/python/keras/activations.py b/tensorflow/python/keras/activations.py index e487f583be..f608dea430 100644 --- a/tensorflow/python/keras/activations.py +++ b/tensorflow/python/keras/activations.py @@ -93,6 +93,8 @@ def selu(x): - To be used together with the initialization "lecun_normal". - To be used together with the dropout variant "AlphaDropout". + References: + - [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) """ alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py index 70b6a8431a..9f91368e5b 100644 --- a/tensorflow/python/keras/callbacks.py +++ b/tensorflow/python/keras/callbacks.py @@ -724,15 +724,6 @@ class TensorBoard(Callback): for weight in layer.weights: mapped_weight_name = weight.name.replace(':', '_') tf_summary.histogram(mapped_weight_name, weight) - if self.write_grads: - grads = model.optimizer.get_gradients(model.total_loss, weight) - - def is_indexed_slices(grad): - return type(grad).__name__ == 'IndexedSlices' - - grads = [grad.values if is_indexed_slices(grad) else grad - for grad in grads] - tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads) if self.write_images: w_img = array_ops.squeeze(weight) shape = K.int_shape(w_img) @@ -759,6 +750,18 @@ class TensorBoard(Callback): assert len(shape) == 4 and shape[-1] in [1, 3, 4] tf_summary.image(mapped_weight_name, w_img) + if self.write_grads: + for weight in layer.trainable_weights: + mapped_weight_name = weight.name.replace(':', '_') + grads = model.optimizer.get_gradients(model.total_loss, weight) + + def is_indexed_slices(grad): + return type(grad).__name__ == 'IndexedSlices' + + grads = [grad.values if is_indexed_slices(grad) else grad + for grad in grads] + tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads) + if hasattr(layer, 'output'): tf_summary.histogram('{}_out'.format(layer.name), layer.output) self.merged = tf_summary.merge_all() diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py index b355f4a269..5062a26580 100644 --- a/tensorflow/python/keras/callbacks_test.py +++ b/tensorflow/python/keras/callbacks_test.py @@ -653,6 +653,8 @@ class KerasCallbacksTest(test.TestCase): model.add( keras.layers.Dense( NUM_HIDDEN, input_dim=INPUT_DIM, activation='relu')) + # non_trainable_weights: moving_variance, moving_mean + model.add(keras.layers.BatchNormalization()) model.add(keras.layers.Dense(NUM_CLASSES, activation='softmax')) model.compile( loss='categorical_crossentropy', diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py index a4cd017d60..1c9135982e 100644 --- a/tensorflow/python/keras/engine/network.py +++ b/tensorflow/python/keras/engine/network.py @@ -123,7 +123,7 @@ class Network(base_layer.Layer): # Entries are unique. Includes input and output layers. self._layers = [] - # Used in symbolic mode only, only in conjonction with graph-networks + # Used in symbolic mode only, only in conjunction with graph-networks self._outbound_nodes = [] self._inbound_nodes = [] diff --git a/tensorflow/python/keras/engine/saving_test.py b/tensorflow/python/keras/engine/saving_test.py index 6a94986b9c..7e82db028b 100644 --- a/tensorflow/python/keras/engine/saving_test.py +++ b/tensorflow/python/keras/engine/saving_test.py @@ -482,7 +482,7 @@ class TestWholeModelSaving(test.TestCase): with h5py.File(fname, 'r') as h5file: num_names_arrays = len([attr for attr in h5file['model_weights'].attrs if attr.startswith('layer_names')]) - # The chunking of layer names array should have happend. + # The chunking of layer names array should have happened. self.assertGreater(num_names_arrays, 0) out2 = model.predict(x) self.assertAllClose(out, out2, atol=1e-05) @@ -527,7 +527,7 @@ class TestWholeModelSaving(test.TestCase): num_weight_arrays = len( [attr for attr in h5file['model_weights']['nested_model'].attrs if attr.startswith('weight_names')]) - # The chunking of layer names array should have happend. + # The chunking of layer names array should have happened. self.assertGreater(num_weight_arrays, 0) out2 = model.predict(x) self.assertAllClose(out, out2, atol=1e-05) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index 89c1f1a40f..fce6cbdb7a 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -24,6 +24,7 @@ import numpy as np from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import iterator_ops from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util @@ -409,11 +410,13 @@ class Model(Network): else: if sample_weight_mode == 'temporal': sample_weights.append(array_ops.placeholder_with_default( - [[1.]], shape=[None, None], name=name + '_sample_weights')) + constant_op.constant([[1.]], dtype=K.floatx()), + shape=[None, None], name=name + '_sample_weights')) sample_weight_modes.append('temporal') else: sample_weights.append(array_ops.placeholder_with_default( - [1.], shape=[None], name=name + '_sample_weights')) + constant_op.constant([1.], dtype=K.floatx()), + shape=[None], name=name + '_sample_weights')) sample_weight_modes.append(None) self.sample_weight_modes = sample_weight_modes self._feed_sample_weight_modes = [] diff --git a/tensorflow/python/keras/engine/training_eager.py b/tensorflow/python/keras/engine/training_eager.py index 2ecbff3a1c..e8838cd3bc 100644 --- a/tensorflow/python/keras/engine/training_eager.py +++ b/tensorflow/python/keras/engine/training_eager.py @@ -732,7 +732,7 @@ def slice_arrays(arrays, indices, contiguous=True): """Slices batches out of provided arrays (workaround for eager tensors). Unfortunately eager tensors don't have the same slicing behavior as - Numpy arrays (they folow the same slicing behavior as symbolic TF tensors), + Numpy arrays (they follow the same slicing behavior as symbolic TF tensors), hence we cannot use `generic_utils.slice_arrays` directly and we have to implement this workaround based on `concat`. This has a performance cost. diff --git a/tensorflow/python/keras/initializers_test.py b/tensorflow/python/keras/initializers_test.py index a54d6da839..c519e194bd 100644 --- a/tensorflow/python/keras/initializers_test.py +++ b/tensorflow/python/keras/initializers_test.py @@ -71,7 +71,7 @@ class KerasInitializersTest(test.TestCase): stddev=1, seed=126), tensor_shape, - target_mean=0., target_std=None, target_max=2) + target_mean=0., target_max=2, target_min=-2) def test_constant(self): tensor_shape = (5, 6, 4) @@ -83,49 +83,49 @@ class KerasInitializersTest(test.TestCase): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, _ = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(3. / fan_in) + std = np.sqrt(1. / fan_in) self._runner(keras.initializers.lecun_uniform(seed=123), tensor_shape, - target_mean=0., target_max=scale, target_min=-scale) + target_mean=0., target_std=std) def test_glorot_uniform(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, fan_out = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(6. / (fan_in + fan_out)) + std = np.sqrt(2. / (fan_in + fan_out)) self._runner(keras.initializers.glorot_uniform(seed=123), tensor_shape, - target_mean=0., target_max=scale, target_min=-scale) + target_mean=0., target_std=std) def test_he_uniform(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, _ = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(6. / fan_in) + std = np.sqrt(2. / fan_in) self._runner(keras.initializers.he_uniform(seed=123), tensor_shape, - target_mean=0., target_max=scale, target_min=-scale) + target_mean=0., target_std=std) def test_lecun_normal(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, _ = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(1. / fan_in) + std = np.sqrt(1. / fan_in) self._runner(keras.initializers.lecun_normal(seed=123), tensor_shape, - target_mean=0., target_std=None, target_max=2 * scale) + target_mean=0., target_std=std) def test_glorot_normal(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, fan_out = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(2. / (fan_in + fan_out)) + std = np.sqrt(2. / (fan_in + fan_out)) self._runner(keras.initializers.glorot_normal(seed=123), tensor_shape, - target_mean=0., target_std=None, target_max=2 * scale) + target_mean=0., target_std=std) def test_he_normal(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, _ = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(2. / fan_in) + std = np.sqrt(2. / fan_in) self._runner(keras.initializers.he_normal(seed=123), tensor_shape, - target_mean=0., target_std=None, target_max=2 * scale) + target_mean=0., target_std=std) def test_orthogonal(self): tensor_shape = (20, 20) diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py index 5061825d38..f60064ed63 100644 --- a/tensorflow/python/keras/layers/core.py +++ b/tensorflow/python/keras/layers/core.py @@ -19,7 +19,9 @@ from __future__ import division from __future__ import print_function import copy +import sys import types as python_types +import warnings import numpy as np @@ -714,6 +716,7 @@ class Lambda(Layer): return self.mask def get_config(self): + module = self.function.__module__ if isinstance(self.function, python_types.LambdaType): function = generic_utils.func_dump(self.function) function_type = 'lambda' @@ -721,21 +724,26 @@ class Lambda(Layer): function = self.function.__name__ function_type = 'function' + output_shape_module = None if isinstance(self._output_shape, python_types.LambdaType): output_shape = generic_utils.func_dump(self._output_shape) output_shape_type = 'lambda' + output_shape_module = self._output_shape.__module__ elif callable(self._output_shape): output_shape = self._output_shape.__name__ output_shape_type = 'function' + output_shape_module = self._output_shape.__module__ else: output_shape = self._output_shape output_shape_type = 'raw' config = { 'function': function, + 'module': module, 'function_type': function_type, 'output_shape': output_shape, 'output_shape_type': output_shape_type, + 'output_shape_module': output_shape_module, 'arguments': self.arguments } base_config = super(Lambda, self).get_config() @@ -745,8 +753,16 @@ class Lambda(Layer): def from_config(cls, config, custom_objects=None): config = config.copy() globs = globals() + module = config.pop('module', None) + if module in sys.modules: + globs.update(sys.modules[module].__dict__) + elif module is not None: + # Note: we don't know the name of the function if it's a lambda. + warnings.warn('{} is not loaded, but a Lambda layer uses it. ' + 'It may cause errors.'.format(module) + , UserWarning) if custom_objects: - globs = dict(list(globs.items()) + list(custom_objects.items())) + globs.update(custom_objects) function_type = config.pop('function_type') if function_type == 'function': # Simple lookup in custom objects @@ -760,6 +776,14 @@ class Lambda(Layer): else: raise TypeError('Unknown function type:', function_type) + output_shape_module = config.pop('output_shape_module', None) + if output_shape_module in sys.modules: + globs.update(sys.modules[output_shape_module].__dict__) + elif output_shape_module is not None: + # Note: we don't know the name of the function if it's a lambda. + warnings.warn('{} is not loaded, but a Lambda layer uses it. ' + 'It may cause errors.'.format(output_shape_module) + , UserWarning) output_shape_type = config.pop('output_shape_type') if output_shape_type == 'function': # Simple lookup in custom objects diff --git a/tensorflow/python/keras/models_test.py b/tensorflow/python/keras/models_test.py index c616d8f24f..e6e45902a8 100644 --- a/tensorflow/python/keras/models_test.py +++ b/tensorflow/python/keras/models_test.py @@ -144,5 +144,19 @@ class CheckpointingTests(test.TestCase): model.load_weights(save_prefix) self.assertEqual(12., self.evaluate(beta1_power)) +class TestModelBackend(test.TestCase): + + def test_model_backend_float64_use_cases(self): + # Test case for GitHub issue 19318 + floatx = keras.backend.floatx() + keras.backend.set_floatx('float64') + + x = keras.Input((5,)) + y = keras.layers.Dense(1)(x) + model = keras.models.Model(x, y) + model.compile('rmsprop', 'mse') + + keras.backend.set_floatx(floatx) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/kernel_tests/as_string_op_test.py b/tensorflow/python/kernel_tests/as_string_op_test.py index 9d54add264..94ed8ebd31 100644 --- a/tensorflow/python/kernel_tests/as_string_op_test.py +++ b/tensorflow/python/kernel_tests/as_string_op_test.py @@ -130,6 +130,16 @@ class AsStringOpTest(test.TestCase): result = output.eval(feed_dict={input_: int_inputs_}) self.assertAllEqual(s(result), ["%d" % x for x in int_inputs_]) + def testHalfInt(self): + s = lambda strs: [x.decode("ascii") for x in strs] + + with self.test_session(): + input_ = array_ops.placeholder(dtypes.int16) + int_inputs_ = [np.iinfo(np.int16).min, np.iinfo(np.int16).max] + output = string_ops.as_string(input_) + result = output.eval(feed_dict={input_: int_inputs_}) + self.assertAllEqual(s(result), ["%d" % x for x in int_inputs_]) + def testBool(self): bool_inputs_ = [False, True] s = lambda strs: [x.decode("ascii") for x in strs] diff --git a/tensorflow/python/kernel_tests/betainc_op_test.py b/tensorflow/python/kernel_tests/betainc_op_test.py index 08b03f8518..16fdedac41 100644 --- a/tensorflow/python/kernel_tests/betainc_op_test.py +++ b/tensorflow/python/kernel_tests/betainc_op_test.py @@ -172,7 +172,7 @@ class BetaincTest(test.TestCase): tf_gout_t = math_ops.betainc(tf_ga_s, tf_gb_s, tf_gx_s) err = gradient_checker.compute_gradient_error( [tf_gx_s], [gx_s.shape], tf_gout_t, gx_s.shape) - print("betainc gradient err = %g " % err) + tf_logging.info("betainc gradient err = %g " % err) self.assertLess(err, err_tolerance) # Test broadcast gradient @@ -181,7 +181,7 @@ class BetaincTest(test.TestCase): tf_gout_t = math_ops.betainc(tf_ga_s, tf_gb_s, tf_gx_s) err = gradient_checker.compute_gradient_error( [tf_gx_s], [()], tf_gout_t, ga_s.shape) - print("betainc gradient err = %g " % err) + tf_logging.info("betainc gradient err = %g " % err) self.assertLess(err, err_tolerance) diff --git a/tensorflow/python/kernel_tests/clip_ops_test.py b/tensorflow/python/kernel_tests/clip_ops_test.py index e08123b041..fb52d10475 100644 --- a/tensorflow/python/kernel_tests/clip_ops_test.py +++ b/tensorflow/python/kernel_tests/clip_ops_test.py @@ -18,9 +18,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np + from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.platform import test @@ -414,6 +417,16 @@ class ClipTest(test.TestCase): self.assertAllClose(np_ans, tf_ans) + def testClipByValueEmptyTensor(self): + # Test case for GitHub issue 19337 + zero = array_ops.placeholder(dtype=dtypes.float32, shape=None) + x = clip_ops.clip_by_value(zero, zero, zero) + y = clip_ops.clip_by_value(zero, 1.0, 1.0) + z = clip_ops.clip_by_value(zero, zero, 1.0) + w = clip_ops.clip_by_value(zero, 1.0, zero) + with self.test_session(use_gpu=True) as sess: + sess.run([x, y, z, w], feed_dict={zero: np.zeros((7, 0))}) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py index 8699fd5b25..80ba7dafc9 100644 --- a/tensorflow/python/kernel_tests/conv_ops_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_test.py @@ -312,8 +312,8 @@ class Conv2DTest(test.TestCase): expected_values = self.evaluate(expected_results) computed_values = self.evaluate(computed_results) for e_value, c_value in zip(expected_values, computed_values): - print("expected = ", e_value) - print("actual = ", c_value) + tf_logging.info("expected = ", e_value) + tf_logging.info("actual = ", c_value) self.assertAllClose( e_value.flatten(), c_value.flatten(), atol=tolerance, rtol=1e-4) @@ -337,8 +337,8 @@ class Conv2DTest(test.TestCase): for i in range(len(tensors)): conv = tensors[i] value = values[i] - print("expected = ", expected) - print("actual = ", value) + tf_logging.info("expected = ", expected) + tf_logging.info("actual = ", value) tol = 1e-5 if value.dtype == np.float16: tol = 1e-3 @@ -547,8 +547,8 @@ class Conv2DTest(test.TestCase): # "values" consists of two tensors for two backprops value = self.evaluate(conv) self.assertShapeEqual(value, conv) - print("expected = ", expected) - print("actual = ", value) + tf_logging.info("expected = ", expected) + tf_logging.info("actual = ", value) self.assertArrayNear(expected, value.flatten(), err) def _CompareBackpropInput(self, input_sizes, filter_sizes, output_sizes, @@ -723,8 +723,8 @@ class Conv2DTest(test.TestCase): data_format=data_format) value = self.evaluate(conv) self.assertShapeEqual(value, conv) - print("expected = ", expected) - print("actual = ", value) + tf_logging.info("expected = ", expected) + tf_logging.info("actual = ", value) self.assertArrayNear(expected, value.flatten(), 1e-5) def _CompareBackFilter(self, input_sizes, filter_sizes, output_sizes, @@ -912,8 +912,8 @@ class Conv2DTest(test.TestCase): value_2 = sess.run(conv_2) self.assertShapeEqual(value, conv) self.assertShapeEqual(value_2, conv_2) - print("expected = ", value_2) - print("actual = ", value) + tf_logging.info("expected = ", value_2) + tf_logging.info("actual = ", value) self.assertArrayNear(value_2.flatten(), value.flatten(), err) # Testing for backprops @@ -965,8 +965,8 @@ class Conv2DTest(test.TestCase): value_2 = sess.run(conv_2) self.assertShapeEqual(value, conv) self.assertShapeEqual(value_2, conv_2) - print("expected = ", value_2) - print("actual = ", value) + tf_logging.info("expected = ", value_2) + tf_logging.info("actual = ", value) self.assertArrayNear(value_2.flatten(), value.flatten(), err) def testConv2D2x2Depth3ValidBackpropFilterStride1x1Dilation2x1(self): @@ -1178,7 +1178,7 @@ class Conv2DTest(test.TestCase): # since fp16 numerical gradients are too imprecise. err = np.fabs(jacob_t - reference_jacob_t).max() - print("conv_2d gradient error = ", err) + tf_logging.info("conv_2d gradient error = ", err) self.assertLess(err, 0.002) def testInputGradientValidPaddingStrideOne(self): @@ -1546,7 +1546,7 @@ class DepthwiseConv2DTest(test.TestCase): conv = nn_impl.depthwise_conv2d( t1, t2, strides=[1, stride, stride, 1], padding=padding) value = sess.run(conv) - print("value = ", value) + tf_logging.info("value = ", value) self.assertArrayNear(expected, np.ravel(value), 1e-5) self.assertShapeEqual(value, conv) @@ -1668,7 +1668,7 @@ class SeparableConv2DTest(test.TestCase): conv = array_ops.transpose(conv, [0, 2, 3, 1]) value = sess.run(conv) - print("value = ", value) + tf_logging.info("value = ", value) self.assertArrayNear(expected, np.ravel(value), 1e-5) self.assertShapeEqual(value, conv) @@ -1826,7 +1826,7 @@ class Conv2DBenchmark(test.Benchmark): wall_time = time.time() - start self.report_benchmark( name="conv_stack_iter_%d" % iter_index, wall_time=wall_time) - print("conv_stack_iter_%d: %.4f" % (iter_index, wall_time)) + tf_logging.info("conv_stack_iter_%d: %.4f" % (iter_index, wall_time)) def GetInceptionFwdTest(input_size, filter_size, stride, padding, diff --git a/tensorflow/python/kernel_tests/gather_nd_op_test.py b/tensorflow/python/kernel_tests/gather_nd_op_test.py index 91ebe8de99..58e2a8ac2a 100644 --- a/tensorflow/python/kernel_tests/gather_nd_op_test.py +++ b/tensorflow/python/kernel_tests/gather_nd_op_test.py @@ -197,7 +197,21 @@ class GatherNdTest(test.TestCase): self.assertEqual(None, shape.ndims) self.assertEqual(None, shape[0].value) - def testBadIndices(self): + def testBadIndicesCPU(self): + with self.test_session(use_gpu=False): + params = [0, 1, 2] + indices = [[[0], [7]]] # Make this one higher rank + gather_nd = array_ops.gather_nd(params, indices) + with self.assertRaisesOpError( + r"flat indices\[1, :\] = \[7\] does not index into param " + r"\(shape: \[3\]\)"): + gather_nd.eval() + + def _disabledTestBadIndicesGPU(self): + # TODO disabled due to different behavior on GPU and CPU + # On GPU the bad indices do not raise error but fetch 0 values + if not test.is_gpu_available(): + return with self.test_session(use_gpu=True): params = [0, 1, 2] indices = [[[0], [7]]] # Make this one higher rank @@ -207,7 +221,21 @@ class GatherNdTest(test.TestCase): r"\(shape: \[3\]\)"): gather_nd.eval() - def testBadIndicesWithSlices(self): + def testBadIndicesWithSlicesCPU(self): + with self.test_session(use_gpu=False): + params = [[0, 1, 2]] + indices = [[[0], [0], [1]]] # Make this one higher rank + gather_nd = array_ops.gather_nd(params, indices) + with self.assertRaisesOpError( + r"flat indices\[2, :\] = \[1\] does not index into param " + r"\(shape: \[1,3\]\)"): + gather_nd.eval() + + def _disabledTestBadIndicesWithSlicesGPU(self): + # TODO disabled due to different behavior on GPU and CPU + # On GPU the bad indices do not raise error but fetch 0 values + if not test.is_gpu_available(): + return with self.test_session(use_gpu=True): params = [[0, 1, 2]] indices = [[[0], [0], [1]]] # Make this one higher rank diff --git a/tensorflow/python/kernel_tests/gather_op_test.py b/tensorflow/python/kernel_tests/gather_op_test.py index a2fcd751df..033fa95935 100644 --- a/tensorflow/python/kernel_tests/gather_op_test.py +++ b/tensorflow/python/kernel_tests/gather_op_test.py @@ -27,7 +27,8 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.platform import test -_TEST_TYPES = (dtypes.float32, dtypes.complex64, dtypes.complex128) +_TEST_TYPES = (dtypes.int64, dtypes.float32, + dtypes.complex64, dtypes.complex128) class GatherTest(test.TestCase): @@ -122,6 +123,9 @@ class GatherTest(test.TestCase): gather, [tf_params, tf_indices, tf_axis], gather_grad) self.assertEqual(indices_grad, None) self.assertEqual(axis_grad, None) + if dtype.is_integer: + self.assertEqual(params_grad, None) + continue # For axis 0, we are able to create an efficient IndexedSlices for # the gradient. if axis == 0: @@ -177,7 +181,19 @@ class GatherTest(test.TestCase): gather_t = array_ops.gather(params, indices, axis=axis) self.assertEqual(None, gather_t.shape) - def testBadIndices(self): + def testBadIndicesCPU(self): + with self.test_session(use_gpu=False): + params = [[0, 1, 2], [3, 4, 5]] + with self.assertRaisesOpError(r"indices\[0,0\] = 7 is not in \[0, 2\)"): + array_ops.gather(params, [[7]], axis=0).eval() + with self.assertRaisesOpError(r"indices\[0,0\] = 7 is not in \[0, 3\)"): + array_ops.gather(params, [[7]], axis=1).eval() + + def _disabledTestBadIndicesGPU(self): + # TODO disabled due to different behavior on GPU and CPU + # On GPU the bad indices do not raise error but fetch 0 values + if not test.is_gpu_available(): + return with self.test_session(use_gpu=True): params = [[0, 1, 2], [3, 4, 5]] with self.assertRaisesOpError(r"indices\[0,0\] = 7 is not in \[0, 2\)"): diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py index a9b55854f1..795aa67248 100644 --- a/tensorflow/python/kernel_tests/init_ops_test.py +++ b/tensorflow/python/kernel_tests/init_ops_test.py @@ -362,6 +362,33 @@ class UniformUnitScalingInitializationTest(test.TestCase): dtype=dtypes.string) +class VarianceScalingInitializationTest(test.TestCase): + + def testNormalDistribution(self): + shape = [100, 100] + expect_mean = 0. + expect_var = 1. / shape[0] + init = init_ops.variance_scaling_initializer(distribution='normal') + + with self.test_session(use_gpu=True): + x = init(shape).eval() + + self.assertNear(np.mean(x), expect_mean, err=1e-2) + self.assertNear(np.var(x), expect_var, err=1e-2) + + def testUniformDistribution(self): + shape = [100, 100] + expect_mean = 0. + expect_var = 1. / shape[0] + init = init_ops.variance_scaling_initializer(distribution='uniform') + + with self.test_session(use_gpu=True): + x = init(shape).eval() + + self.assertNear(np.mean(x), expect_mean, err=1e-2) + self.assertNear(np.var(x), expect_var, err=1e-2) + + # TODO(vrv): move to sequence_ops_test? class RangeTest(test.TestCase): diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index a0c372db7d..e95c729715 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -947,7 +947,7 @@ class PoolingTest(test.TestCase): output_sizes, x_init_value=x_init_value, delta=1e-2) - print("%s gradient error = " % func_name, err) + tf_logging.info("%s gradient error = " % func_name, err) self.assertLess(err, err_tolerance) def _ConstructAndTestSecondGradient(self, @@ -1024,7 +1024,7 @@ class PoolingTest(test.TestCase): input_sizes, x_init_value=x_init_value, delta=1e-2) - print("%s second-order gradient error = " % func_name, err) + tf_logging.info("%s second-order gradient error = " % func_name, err) self.assertLess(err, err_tolerance) def _testMaxPoolGradValidPadding1_1(self, data_format, use_gpu): diff --git a/tensorflow/python/kernel_tests/py_func_test.py b/tensorflow/python/kernel_tests/py_func_test.py index 677253946e..253e43920b 100644 --- a/tensorflow/python/kernel_tests/py_func_test.py +++ b/tensorflow/python/kernel_tests/py_func_test.py @@ -19,6 +19,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import gc import re import numpy as np @@ -434,13 +435,29 @@ class PyFuncTest(test.TestCase): # ----- Tests shared by py_func and eager_py_func ----- def testCleanup(self): - for _ in xrange(1000): - g = ops.Graph() - with g.as_default(): - c = constant_op.constant([1.], dtypes.float32) - _ = script_ops.py_func(lambda x: x + 1, [c], [dtypes.float32]) - _ = script_ops.eager_py_func(lambda x: x + 1, [c], [dtypes.float32]) - self.assertLess(script_ops._py_funcs.size(), 100) + # Delete everything created by previous tests to avoid side effects. + ops.reset_default_graph() + gc.collect() + initial_size = script_ops._py_funcs.size() + # Encapsulate the graph generation, so locals can be deleted. + def make_graphs(): + for _ in xrange(1000): + g = ops.Graph() + with g.as_default(): + c = constant_op.constant([1.], dtypes.float32) + _ = script_ops.py_func(lambda x: x + 1, [c], [dtypes.float32]) + _ = script_ops.eager_py_func(lambda x: x + 1, [c], [dtypes.float32]) + # These ops have a reference to 'c' which has a reference to the graph. + # Checks if the functions are being deleted though the graph is referenced from them. + # (see #18292) + _ = script_ops.py_func(lambda x: x + c.shape[0], [c], [dtypes.float32]) + _ = script_ops.eager_py_func(lambda x: x + c.shape[0], [c], [dtypes.float32]) + + # Call garbage collector to enforce deletion. + make_graphs() + ops.reset_default_graph() + gc.collect() + self.assertEqual(initial_size, script_ops._py_funcs.size()) # ----- Tests for eager_py_func ----- @test_util.run_in_graph_and_eager_modes() diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py index 79fe927b8a..faa4b49a8d 100644 --- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py @@ -144,7 +144,9 @@ class StatefulScatterNdTest(test.TestCase): self.assertAllClose(new, ref_var.eval()) def _VariableRankTests(self, np_scatter, tf_scatter): - for vtype in (np.float32, np.float64, np.complex64, np.complex128): + for vtype in (np.int32, + np.float32, np.float64, + np.complex64, np.complex128): for itype in (np.int32, np.int64): self._VariableRankTest(np_scatter, tf_scatter, vtype, itype) @@ -221,7 +223,7 @@ class StatefulScatterNdTest(test.TestCase): # self._VariableRankTests(_NumpyDiv, state_ops.scatter_nd_div) def _ScatterRepeatIndicesTest(self, np_scatter, tf_scatter): - for vtype in (np.float32, np.float64): + for vtype in (np.int32, np.float32, np.float64): for itype in (np.int32, np.int64): self._VariableRankTest( np_scatter, tf_scatter, vtype, itype, repeat_indices=True) diff --git a/tensorflow/python/kernel_tests/scatter_ops_test.py b/tensorflow/python/kernel_tests/scatter_ops_test.py index c70a4ffce7..1a0fa744ae 100644 --- a/tensorflow/python/kernel_tests/scatter_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_ops_test.py @@ -159,7 +159,13 @@ class ScatterTest(test.TestCase): # Clips small values to avoid division by zero. def clip_small_values(x): - return 1e-4 * np.sign(x) if np.abs(x) < 1e-4 else x + threshold = 1e-4 + sign = np.sign(x) + + if isinstance(x, np.int32): + threshold = 1 + sign = np.random.choice([-1, 1]) + return threshold * sign if np.abs(x) < threshold else x updates = np.vectorize(clip_small_values)(updates) old = _AsType(np.random.randn(*((first_dim,) + extra_shape)), vtype) @@ -181,7 +187,11 @@ class ScatterTest(test.TestCase): tf_scatter, repeat_indices=False, updates_are_scalar=False): - for vtype in (np.float32, np.float64): + vtypes = [np.float32, np.float64] + if tf_scatter != state_ops.scatter_div: + vtypes.append(np.int32) + + for vtype in vtypes: for itype in (np.int32, np.int64): self._VariableRankTest(tf_scatter, vtype, itype, repeat_indices, updates_are_scalar) diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index 794be096b7..a82855dfeb 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -264,7 +264,9 @@ class UnsortedSegmentTest(SegmentReductionHelper): # A subset of ops has been enabled for complex numbers self.complex_ops_list = [(np.add, None, - math_ops.unsorted_segment_sum, lambda t: 0)] + math_ops.unsorted_segment_sum, lambda t: 0), + (np.ndarray.__mul__, None, + math_ops.unsorted_segment_prod, lambda t: 1)] self.differentiable_dtypes = [dtypes_lib.float16, dtypes_lib.float32, dtypes_lib.float64] self.all_dtypes = (self.differentiable_dtypes + diff --git a/tensorflow/python/kernel_tests/string_split_op_test.py b/tensorflow/python/kernel_tests/string_split_op_test.py index a5bd1b6ee0..e20daccb28 100644 --- a/tensorflow/python/kernel_tests/string_split_op_test.py +++ b/tensorflow/python/kernel_tests/string_split_op_test.py @@ -146,5 +146,101 @@ class StringSplitOpTest(test.TestCase): self.assertAllEqual(shape, [3, 1]) +class StringSplitV2OpTest(test.TestCase): + + def testSplitV2(self): + strings = ["pigs on the wing", "animals"] + + with self.test_session() as sess: + tokens = string_ops.string_split_v2(strings) + indices, values, shape = sess.run(tokens) + self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [1, 0]]) + self.assertAllEqual(values, [b"pigs", b"on", b"the", b"wing", b"animals"]) + self.assertAllEqual(shape, [2, 4]) + + def testSplitV2MultiCharSeparator(self): + # Match Python behavior: + # >>> '1<>2<>3'.split('<>') + # ['1', '2', '3'] + # >>> "<><>4<>5<><>6<>".split("<>") + # ['', '', '4', '5', '', '6', ''] + strings = ["1<>2<>3", "<><>4<>5<><>6<>"] + + with self.test_session() as sess: + tokens = string_ops.string_split_v2(strings, sep="<>") + indices, values, shape = sess.run(tokens) + self.assertAllEqual( + indices, [[0, 0], [0, 1], [0, 2], + [1, 0], [1, 1], [1, 2], [1, 3], [1, 4], [1, 5], [1, 6]]) + self.assertAllEqual(values, [b"1", b"2", b"3", + b"", b"", b"4", b"5", b"", b"6", b""]) + self.assertAllEqual(shape, [2, 7]) + + def testSplitV2SimpleSeparator(self): + # Match Python behavior: + # >>> '1,2,3'.split(',') + # ['1', '2', '3'] + # >>> '1,2,,3,'.split(',') + # ['1', '2', '', '3', ''] + strings = ["1,2,3", "4,5,,6,"] + + with self.test_session() as sess: + tokens = string_ops.string_split_v2(strings, sep=',') + indices, values, shape = sess.run(tokens) + self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], + [1, 0], [1, 1], [1, 2], [1, 3], [1, 4]]) + self.assertAllEqual(values, [b"1", b"2", b"3", + b"4", b"5", b"", b"6", b""]) + self.assertAllEqual(shape, [2, 5]) + + def testSplitV2EmptySeparator(self): + # Match Python behavior: + # >>> '1 2 3'.split() + # ['1', '2', '3'] + #>>> ' 1 2 3 '.split() + #['1', '2', '3'] + strings = ["1 2 3", " 4 5 6 "] + + with self.test_session() as sess: + tokens = string_ops.string_split_v2(strings) + indices, values, shape = sess.run(tokens) + self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], + [1, 0], [1, 1], [1, 2]]) + self.assertAllEqual(values, [b"1", b"2", b"3", b"4", b"5", b"6"]) + self.assertAllEqual(shape, [2, 3]) + + def testSplitV2SimpleSeparatorMaxSplit(self): + # Match Python behavior: + # >>> '1,2,3'.split(',', maxsplit=1) + # ['1', '2,3'] + # >>> '4,5,,6,'.split(',', maxsplit=1) + # ['4', '5,,6,'] + strings = ["1,2,3", "4,5,,6,"] + + with self.test_session() as sess: + tokens = string_ops.string_split_v2(strings, sep=',', maxsplit=1) + indices, values, shape = sess.run(tokens) + self.assertAllEqual(indices, [[0, 0], [0, 1], + [1, 0], [1, 1]]) + self.assertAllEqual(values, [b"1", b"2,3", b"4", b"5,,6,"]) + self.assertAllEqual(shape, [2, 2]) + + def testSplitV2EmptySeparatorMaxSplit(self): + # Match Python behavior: + # '1 2 3'.split(maxsplit=1) + # ['1', '2 3'] + # >>> " 4 5 6 ".split(maxsplit=1) + # ['4', '5 6 '] + strings = ["1 2 3", " 4 5 6 "] + + with self.test_session() as sess: + tokens = string_ops.string_split_v2(strings, maxsplit=1) + indices, values, shape = sess.run(tokens) + self.assertAllEqual(indices, [[0, 0], [0, 1], + [1, 0], [1, 1]]) + self.assertAllEqual(values, [b"1", b"2 3", b"4", b"5 6 "]) + self.assertAllEqual(shape, [2, 2]) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 8129334703..fae63b1132 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -2619,6 +2619,10 @@ reverse.__doc__ = gen_array_ops.reverse_v2.__doc__ # pylint: disable=redefined-builtin @tf_export("reverse_sequence") +@deprecation.deprecated_args( + None, "seq_dim is deprecated, use seq_axis instead", "seq_dim") +@deprecation.deprecated_args( + None, "batch_dim is deprecated, use batch_axis instead", "batch_dim") def reverse_sequence(input, seq_lengths, seq_axis=None, diff --git a/tensorflow/python/ops/gradient_checker.py b/tensorflow/python/ops/gradient_checker.py index 12afcd0b51..94c8d79335 100644 --- a/tensorflow/python/ops/gradient_checker.py +++ b/tensorflow/python/ops/gradient_checker.py @@ -283,10 +283,10 @@ def compute_gradient(x, numbers. For example, if `x` is complex with shape `[m]` and `y` is complex with shape `[n]`, each Jacobian `J` will have shape `[m * 2, n * 2]` with - J[:m, :n] = d(Re y)/d(Re x) - J[:m, n:] = d(Im y)/d(Re x) - J[m:, :n] = d(Re y)/d(Im x) - J[m:, n:] = d(Im y)/d(Im x) + J[::2, ::2] = d(Re y)/d(Re x) + J[::2, 1::2] = d(Im y)/d(Re x) + J[1::2, ::2] = d(Re y)/d(Im x) + J[1::2, 1::2] = d(Im y)/d(Im x) Args: x: a tensor or list of tensors diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index bdcf420980..f27d9224c1 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -28,6 +28,7 @@ from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import functional_ops from tensorflow.python.ops import gen_image_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import math_ops @@ -258,14 +259,14 @@ def random_flip_up_down(image, seed=None): dimension, which is `height`. Otherwise output the image as-is. Args: - image: A 3-D tensor of shape `[height, width, channels].` + image: 4-D Tensor of shape `[batch, height, width, channels]` or + 3-D Tensor of shape `[height, width, channels]`. seed: A Python integer. Used to create a random seed. See @{tf.set_random_seed} for behavior. Returns: - A 3-D tensor of the same type and shape as `image`. - + A tensor of the same type and shape as `image`. Raises: ValueError: if the shape of `image` not supported. """ @@ -280,13 +281,14 @@ def random_flip_left_right(image, seed=None): second dimension, which is `width`. Otherwise output the image as-is. Args: - image: A 3-D tensor of shape `[height, width, channels].` + image: 4-D Tensor of shape `[batch, height, width, channels]` or + 3-D Tensor of shape `[height, width, channels]`. seed: A Python integer. Used to create a random seed. See @{tf.set_random_seed} for behavior. Returns: - A 3-D tensor of the same type and shape as `image`. + A tensor of the same type and shape as `image`. Raises: ValueError: if the shape of `image` not supported. @@ -297,7 +299,8 @@ def random_flip_left_right(image, seed=None): def _random_flip(image, flip_index, seed, scope_name): """Randomly (50% chance) flip an image along axis `flip_index`. Args: - image: A 3-D tensor of shape `[height, width, channels].` + image: 4-D Tensor of shape `[batch, height, width, channels]` or + 3-D Tensor of shape `[height, width, channels]`. flip_index: The dimension along which to flip the image. Vertical: 0, Horizontal: 1 seed: A Python integer. Used to create a random seed. See @@ -306,22 +309,37 @@ def _random_flip(image, flip_index, seed, scope_name): scope_name: Name of the scope in which the ops are added. Returns: - A 3-D tensor of the same type and shape as `image`. + A tensor of the same type and shape as `image`. Raises: ValueError: if the shape of `image` not supported. """ with ops.name_scope(None, scope_name, [image]) as scope: image = ops.convert_to_tensor(image, name='image') - image = _Assert3DImage(image) - uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed) - mirror_cond = math_ops.less(uniform_random, .5) - result = control_flow_ops.cond( - mirror_cond, - lambda: array_ops.reverse(image, [flip_index]), - lambda: image, - name=scope) - return fix_image_flip_shape(image, result) + image = _AssertAtLeast3DImage(image) + shape = image.get_shape() + if shape.ndims == 3 or shape.ndims is None: + uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed) + mirror_cond = math_ops.less(uniform_random, .5) + result = control_flow_ops.cond( + mirror_cond, + lambda: array_ops.reverse(image, [flip_index]), + lambda: image, + name=scope + ) + return fix_image_flip_shape(image, result) + elif shape.ndims == 4: + uniform_random = random_ops.random_uniform( + [array_ops.shape(image)[0]], 0, 1.0, seed=seed + ) + mirror_cond = math_ops.less(uniform_random, .5) + return array_ops.where( + mirror_cond, + image, + functional_ops.map_fn(lambda x: array_ops.reverse(x, [flip_index]), image, dtype=image.dtype) + ) + else: + raise ValueError('\'image\' must have either 3 or 4 dimensions.') @tf_export('image.flip_left_right') @@ -1634,13 +1652,13 @@ def is_jpeg(contents, name=None): @tf_export('image.decode_image') -def decode_image(contents, channels=None, name=None): +def decode_image(contents, channels=None, dtype=dtypes.uint8, name=None): """Convenience function for `decode_bmp`, `decode_gif`, `decode_jpeg`, and `decode_png`. Detects whether an image is a BMP, GIF, JPEG, or PNG, and performs the - appropriate operation to convert the input bytes `string` into a `Tensor` of - type `uint8`. + appropriate operation to convert the input bytes `string` into a `Tensor` + of type `dtype`. Note: `decode_gif` returns a 4-D array `[num_frames, height, width, 3]`, as opposed to `decode_bmp`, `decode_jpeg` and `decode_png`, which return 3-D @@ -1652,10 +1670,11 @@ def decode_image(contents, channels=None, name=None): contents: 0-D `string`. The encoded image bytes. channels: An optional `int`. Defaults to `0`. Number of color channels for the decoded image. + dtype: The desired DType of the returned `Tensor`. name: A name for the operation (optional) Returns: - `Tensor` with type `uint8` with shape `[height, width, num_channels]` for + `Tensor` with type `dtype` and shape `[height, width, num_channels]` for BMP, JPEG, and PNG images and shape `[num_frames, height, width, 3]` for GIF images. @@ -1679,7 +1698,7 @@ def decode_image(contents, channels=None, name=None): channels_msg = 'Channels must be in (None, 0, 3) when decoding BMP images' assert_channels = control_flow_ops.Assert(good_channels, [channels_msg]) with ops.control_dependencies([assert_decode, assert_channels]): - return gen_image_ops.decode_bmp(contents) + return convert_image_dtype(gen_image_ops.decode_bmp(contents), dtype) def _gif(): # Create assert to make sure that channels is not set to 1 @@ -1692,7 +1711,7 @@ def decode_image(contents, channels=None, name=None): channels_msg = 'Channels must be in (None, 0, 3) when decoding GIF images' assert_channels = control_flow_ops.Assert(good_channels, [channels_msg]) with ops.control_dependencies([assert_channels]): - return gen_image_ops.decode_gif(contents) + return convert_image_dtype(gen_image_ops.decode_gif(contents), dtype) def check_gif(): # Create assert op to check that bytes are GIF decodable @@ -1701,7 +1720,11 @@ def decode_image(contents, channels=None, name=None): def _png(): """Decodes a PNG image.""" - return gen_image_ops.decode_png(contents, channels) + return convert_image_dtype( + gen_image_ops.decode_png(contents, channels, + dtype=dtypes.uint8 + if dtype == dtypes.uint8 + else dtypes.uint16), dtype) def check_png(): """Checks if an image is PNG.""" @@ -1717,7 +1740,8 @@ def decode_image(contents, channels=None, name=None): 'images') assert_channels = control_flow_ops.Assert(good_channels, [channels_msg]) with ops.control_dependencies([assert_channels]): - return gen_image_ops.decode_jpeg(contents, channels) + return convert_image_dtype( + gen_image_ops.decode_jpeg(contents, channels), dtype) # Decode normal JPEG images (start with \xff\xd8\xff\xe0) # as well as JPEG images with EXIF data (start with \xff\xd8\xff\xe1). @@ -1878,7 +1902,7 @@ def sample_distorted_bounding_box(image_size, width / height within this range. area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The cropped area of the image must contain a fraction of the - supplied image within in this range. + supplied image within this range. max_attempts: An optional `int`. Defaults to `100`. Number of attempts at generating a cropped region of the image of the specified constraints. After `max_attempts` failures, return the diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index 45499dcce0..2a6ab26e96 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -533,6 +533,37 @@ class FlipImageBenchmark(test.Benchmark): iters=benchmark_rounds, wall_time=step_time) + def _benchmarkBatchedRandomFlipLeftRight(self, device, cpu_count): + image_shape = [16, 299, 299, 3] + warmup_rounds = 100 + benchmark_rounds = 1000 + config = config_pb2.ConfigProto() + if cpu_count is not None: + config.inter_op_parallelism_threads = 1 + config.intra_op_parallelism_threads = cpu_count + with session.Session("", graph=ops.Graph(), config=config) as sess: + with ops.device(device): + inputs = variables.Variable( + random_ops.random_uniform(image_shape, dtype=dtypes.float32) * 255, + trainable=False, + dtype=dtypes.float32) + run_op = image_ops.random_flip_left_right(inputs) + sess.run(variables.global_variables_initializer()) + for i in xrange(warmup_rounds + benchmark_rounds): + if i == warmup_rounds: + start = time.time() + sess.run(run_op) + end = time.time() + step_time = (end - start) / benchmark_rounds + tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all") + print("benchmarkBatchedRandomFlipLeftRight_16_299_299_3_%s step_time: " + "%.2f us" % + (tag, step_time * 1e6)) + self.report_benchmark( + name="benchmarkBatchedRandomFlipLeftRight_16_299_299_3_%s" % (tag), + iters=benchmark_rounds, + wall_time=step_time) + def benchmarkFlipLeftRightCpu1(self): self._benchmarkFlipLeftRight("/cpu:0", 1) @@ -551,6 +582,15 @@ class FlipImageBenchmark(test.Benchmark): def benchmarkRandomFlipLeftRightGpu(self): self._benchmarkRandomFlipLeftRight(test.gpu_device_name(), None) + def benchmarkBatchedRandomFlipLeftRightCpu1(self): + self._benchmarkBatchedRandomFlipLeftRight("/cpu:0", 1) + + def benchmarkBatchedRandomFlipLeftRightCpuAll(self): + self._benchmarkBatchedRandomFlipLeftRight("/cpu:0", None) + + def benchmarkBatchedRandomFlipLeftRightGpu(self): + self._benchmarkBatchedRandomFlipLeftRight(test.gpu_device_name(), None) + class AdjustHueBenchmark(test.Benchmark): @@ -987,7 +1027,7 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): with self.test_session(use_gpu=True): x_tf = constant_op.constant(x_np, shape=x_np.shape) - y = image_ops.random_flip_left_right(x_tf) + y = image_ops.random_flip_left_right(x_tf, seed=seed) self.assertTrue(y.op.name.startswith("random_flip_left_right")) count_flipped = 0 @@ -1008,6 +1048,50 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): self.assertGreaterEqual(count_flipped, 20) self.assertGreaterEqual(count_unflipped, 20) + def testRandomFlipLeftRightWithBatch(self): + batch_size = 16 + seed = 42 + + # create single item of test data + x_np_raw = np.array( + [[1, 2, 3], [1, 2, 3]], dtype=np.uint8 + ).reshape([1, 2, 3, 1]) + y_np_raw = np.array( + [[3, 2, 1], [3, 2, 1]], dtype=np.uint8 + ).reshape([1, 2, 3, 1]) + + # create batched test data + x_np = np.vstack([x_np_raw for _ in range(batch_size)]) + y_np = np.vstack([y_np_raw for _ in range(batch_size)]) + + with self.test_session(use_gpu=True): + x_tf = constant_op.constant(x_np, shape=x_np.shape) + y = image_ops.random_flip_left_right(x_tf, seed=seed) + self.assertTrue(y.op.name.startswith("random_flip_left_right")) + + count_flipped = 0 + count_unflipped = 0 + for _ in range(100): + y_tf = y.eval() + + # check every element of the batch + for i in range(batch_size): + if y_tf[i][0][0] == 1: + self.assertAllEqual(y_tf[i], x_np[i]) + count_unflipped += 1 + else: + self.assertAllEqual(y_tf[i], y_np[i]) + count_flipped += 1 + + # 100 trials, each containing batch_size elements + # Mean: 50 * batch_size + # Std Dev: ~5 * sqrt(batch_size) + # Six Sigma: 50 * batch_size - (5 * 6 * sqrt(batch_size)) + # = 50 * batch_size - 30 * sqrt(batch_size) = 800 - 30 * 4 = 680 + six_sigma = 50 * batch_size - 30 * np.sqrt(batch_size) + self.assertGreaterEqual(count_flipped, six_sigma) + self.assertGreaterEqual(count_unflipped, six_sigma) + def testInvolutionUpDown(self): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1]) @@ -1057,9 +1141,11 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1]) y_np = np.array([[4, 5, 6], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1]) + seed = 42 + with self.test_session(use_gpu=True): x_tf = constant_op.constant(x_np, shape=x_np.shape) - y = image_ops.random_flip_up_down(x_tf, seed=42) + y = image_ops.random_flip_up_down(x_tf, seed=seed) self.assertTrue(y.op.name.startswith("random_flip_up_down")) count_flipped = 0 count_unflipped = 0 @@ -1079,6 +1165,50 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): self.assertGreaterEqual(count_flipped, 20) self.assertGreaterEqual(count_unflipped, 20) + def testRandomFlipUpDownWithBatch(self): + batch_size = 16 + seed = 42 + + # create single item of test data + x_np_raw = np.array( + [[1, 2, 3], [4, 5, 6]], dtype=np.uint8 + ).reshape([1, 2, 3, 1]) + y_np_raw = np.array( + [[4, 5, 6], [1, 2, 3]], dtype=np.uint8 + ).reshape([1, 2, 3, 1]) + + # create batched test data + x_np = np.vstack([x_np_raw for _ in range(batch_size)]) + y_np = np.vstack([y_np_raw for _ in range(batch_size)]) + + with self.test_session(use_gpu=True): + x_tf = constant_op.constant(x_np, shape=x_np.shape) + y = image_ops.random_flip_up_down(x_tf, seed=seed) + self.assertTrue(y.op.name.startswith("random_flip_up_down")) + + count_flipped = 0 + count_unflipped = 0 + for _ in range(100): + y_tf = y.eval() + + # check every element of the batch + for i in range(batch_size): + if y_tf[i][0][0] == 1: + self.assertAllEqual(y_tf[i], x_np[i]) + count_unflipped += 1 + else: + self.assertAllEqual(y_tf[i], y_np[i]) + count_flipped += 1 + + # 100 trials, each containing batch_size elements + # Mean: 50 * batch_size + # Std Dev: ~5 * sqrt(batch_size) + # Six Sigma: 50 * batch_size - (5 * 6 * sqrt(batch_size)) + # = 50 * batch_size - 30 * sqrt(batch_size) = 800 - 30 * 4 = 680 + six_sigma = 50 * batch_size - 30 * np.sqrt(batch_size) + self.assertGreaterEqual(count_flipped, six_sigma) + self.assertGreaterEqual(count_unflipped, six_sigma) + def testInvolutionTranspose(self): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1]) @@ -1156,6 +1286,7 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): #Ops that support 4D input for op in [ image_ops.flip_left_right, image_ops.flip_up_down, + image_ops.random_flip_left_right, image_ops.random_flip_up_down, image_ops.transpose_image, image_ops.rot90 ]: transformed_unknown_dims_4 = op(p_unknown_dims_4) @@ -1166,14 +1297,6 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): "must be at least three-dimensional"): op(p_wrong_rank) - for op in [ - image_ops.random_flip_left_right, - image_ops.random_flip_up_down, - ]: - with self.assertRaisesRegexp(ValueError, "must be three-dimensional"): - op(p_wrong_rank) - - def testRot90GroupOrder(self): image = np.arange(24, dtype=np.uint8).reshape([2, 4, 3]) with self.test_session(use_gpu=True): @@ -1208,41 +1331,6 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): y_np = np.rot90(image, k=k, axes=(1, 2)) self.assertAllEqual(y_np, y_tf.eval({k_placeholder: k})) -class RandomFlipTest(test_util.TensorFlowTestCase): - - def testRandomLeftRight(self): - x_np = np.array([0, 1], dtype=np.uint8).reshape([1, 2, 1]) - num_iterations = 500 - - hist = [0, 0] - with self.test_session(use_gpu=True): - x_tf = constant_op.constant(x_np, shape=x_np.shape) - y = image_ops.random_flip_left_right(x_tf) - for _ in xrange(num_iterations): - y_np = y.eval().flatten()[0] - hist[y_np] += 1 - - # Ensure that each entry is observed within 4 standard deviations. - four_stddev = 4.0 * np.sqrt(num_iterations / 2.0) - self.assertAllClose(hist, [num_iterations / 2.0] * 2, atol=four_stddev) - - def testRandomUpDown(self): - x_np = np.array([0, 1], dtype=np.uint8).reshape([2, 1, 1]) - num_iterations = 500 - - hist = [0, 0] - with self.test_session(use_gpu=True): - x_tf = constant_op.constant(x_np, shape=x_np.shape) - y = image_ops.random_flip_up_down(x_tf) - for _ in xrange(num_iterations): - y_np = y.eval().flatten()[0] - hist[y_np] += 1 - - # Ensure that each entry is observed within 4 standard deviations. - four_stddev = 4.0 * np.sqrt(num_iterations / 2.0) - self.assertAllClose(hist, [num_iterations / 2.0] * 2, atol=four_stddev) - - class AdjustContrastTest(test_util.TensorFlowTestCase): def _testContrast(self, x_np, y_np, contrast_factor): @@ -3880,5 +3968,88 @@ class SobelEdgesTest(test_util.TensorFlowTestCase): self.assertAllClose(expected_batch, actual_sobel) +class DecodeImageTest(test_util.TensorFlowTestCase): + + def testJpegUint16(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/jpeg/testdata" + jpeg0 = io_ops.read_file(os.path.join(base, "jpeg_merge_test1.jpg")) + image0 = image_ops.decode_image(jpeg0, dtype=dtypes.uint16) + image1 = image_ops.convert_image_dtype(image_ops.decode_jpeg(jpeg0), + dtypes.uint16) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testPngUint16(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/png/testdata" + png0 = io_ops.read_file(os.path.join(base, "lena_rgba.png")) + image0 = image_ops.decode_image(png0, dtype=dtypes.uint16) + image1 = image_ops.convert_image_dtype( + image_ops.decode_png(png0, dtype=dtypes.uint16), dtypes.uint16) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testGifUint16(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/gif/testdata" + gif0 = io_ops.read_file(os.path.join(base, "scan.gif")) + image0 = image_ops.decode_image(gif0, dtype=dtypes.uint16) + image1 = image_ops.convert_image_dtype(image_ops.decode_gif(gif0), + dtypes.uint16) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testBmpUint16(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/bmp/testdata" + bmp0 = io_ops.read_file(os.path.join(base, "lena.bmp")) + image0 = image_ops.decode_image(bmp0, dtype=dtypes.uint16) + image1 = image_ops.convert_image_dtype(image_ops.decode_bmp(bmp0), + dtypes.uint16) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testJpegFloat32(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/jpeg/testdata" + jpeg0 = io_ops.read_file(os.path.join(base, "jpeg_merge_test1.jpg")) + image0 = image_ops.decode_image(jpeg0, dtype=dtypes.float32) + image1 = image_ops.convert_image_dtype(image_ops.decode_jpeg(jpeg0), + dtypes.float32) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testPngFloat32(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/png/testdata" + png0 = io_ops.read_file(os.path.join(base, "lena_rgba.png")) + image0 = image_ops.decode_image(png0, dtype=dtypes.float32) + image1 = image_ops.convert_image_dtype( + image_ops.decode_png(png0, dtype=dtypes.uint16), dtypes.float32) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testGifFloat32(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/gif/testdata" + gif0 = io_ops.read_file(os.path.join(base, "scan.gif")) + image0 = image_ops.decode_image(gif0, dtype=dtypes.float32) + image1 = image_ops.convert_image_dtype(image_ops.decode_gif(gif0), + dtypes.float32) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testBmpFloat32(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/bmp/testdata" + bmp0 = io_ops.read_file(os.path.join(base, "lena.bmp")) + image0 = image_ops.decode_image(bmp0, dtype=dtypes.float32) + image1 = image_ops.convert_image_dtype(image_ops.decode_bmp(bmp0), + dtypes.float32) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py index 2df230d470..724fcc39cd 100644 --- a/tensorflow/python/ops/init_ops.py +++ b/tensorflow/python/ops/init_ops.py @@ -467,7 +467,8 @@ class VarianceScaling(Initializer): else: scale /= max(1., (fan_in + fan_out) / 2.) if self.distribution == "normal": - stddev = math.sqrt(scale) + # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) + stddev = math.sqrt(scale) / .87962566103423978 return random_ops.truncated_normal( shape, 0.0, stddev, dtype, seed=self.seed) else: diff --git a/tensorflow/python/ops/logging_ops.py b/tensorflow/python/ops/logging_ops.py index 222b8ebc9d..8276047cb6 100644 --- a/tensorflow/python/ops/logging_ops.py +++ b/tensorflow/python/ops/logging_ops.py @@ -35,8 +35,9 @@ from tensorflow.python.util.tf_export import tf_export # Assert and Print are special symbols in python, so we must -# use an upper-case version of them. -@tf_export("Print") +# have an upper-case version of them. For users with Python 3 or Python 2.7 +# with `from __future__ import print_function`, we also allow lowercase. +@tf_export("Print", "print") def Print(input_, data, message=None, first_n=None, summarize=None, name=None): """Prints a list of tensors. diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index e40481f3a7..466d0dadc8 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -125,8 +125,8 @@ def abs(x, name=None): # pylint: disable=redefined-builtin ``` Args: - x: A `Tensor` or `SparseTensor` of type `float32`, `float64`, `int32`, - `int64`, `complex64` or `complex128`. + x: A `Tensor` or `SparseTensor` of type `float16`, `float32`, `float64`, + `int32`, `int64`, `complex64` or `complex128`. name: A name for the operation (optional). Returns: @@ -430,10 +430,10 @@ def pow(x, y, name=None): # pylint: disable=redefined-builtin ``` Args: - x: A `Tensor` of type `float32`, `float64`, `int32`, `int64`, `complex64`, - or `complex128`. - y: A `Tensor` of type `float32`, `float64`, `int32`, `int64`, `complex64`, - or `complex128`. + x: A `Tensor` of type `float16`, `float32`, `float64`, `int32`, `int64`, + `complex64`, or `complex128`. + y: A `Tensor` of type `float16`, `float32`, `float64`, `int32`, `int64`, + `complex64`, or `complex128`. name: A name for the operation (optional). Returns: @@ -600,7 +600,7 @@ def round(x, name=None): # pylint: disable=redefined-builtin ``` Args: - x: A `Tensor` of type `float32` or `float64`. + x: A `Tensor` of type `float16`, `float32`, `float64`, `int32`, or `int64`. name: A name for the operation (optional). Returns: @@ -1257,7 +1257,7 @@ def reduce_sum(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. For example: @@ -1397,7 +1397,7 @@ def reduce_mean(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. For example: @@ -1469,7 +1469,7 @@ def reduce_prod(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. Args: @@ -1519,7 +1519,7 @@ def reduce_min(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. Args: @@ -1568,7 +1568,7 @@ def reduce_max(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. Args: @@ -1617,7 +1617,7 @@ def reduce_all(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. For example: @@ -1675,7 +1675,7 @@ def reduce_any(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. For example: diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index 783d485892..f47f38e29e 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -621,7 +621,7 @@ def normalize_moments(counts, mean_ss, variance_ss, shift, name=None): """Calculate the mean and variance of based on the sufficient statistics. Args: - counts: A `Tensor` containing a the total count of the data (one value). + counts: A `Tensor` containing the total count of the data (one value). mean_ss: A `Tensor` containing the mean sufficient statistics: the (possibly shifted) sum of the elements to average over. variance_ss: A `Tensor` containing the variance sufficient statistics: the @@ -689,6 +689,9 @@ def moments( # Compute true mean while keeping the dims for proper broadcasting. mean = math_ops.reduce_mean(y, axes, keepdims=True, name="mean") # sample variance, not unbiased variance + # Note: stop_gradient does not change the gradient that gets + # backpropagated to the mean from the variance calculation, + # because that gradient is zero variance = math_ops.reduce_mean( math_ops.squared_difference(y, array_ops.stop_gradient(mean)), axes, diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index a0b55eb077..0c2f5b06c4 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1596,12 +1596,12 @@ def leaky_relu(features, alpha=0.2, name=None): Returns: The activation value. """ - with ops.name_scope(name, "LeakyRelu", [features, alpha]): + with ops.name_scope(name, "LeakyRelu", [features, alpha]) as name: features = ops.convert_to_tensor(features, name="features") if features.dtype.is_integer: features = math_ops.to_float(features) alpha = ops.convert_to_tensor(alpha, dtype=features.dtype, name="alpha") - return math_ops.maximum(alpha * features, features) + return math_ops.maximum(alpha * features, features, name=name) def _flatten_outer_dims(logits): diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py index 46a5f4fae6..035b4735af 100644 --- a/tensorflow/python/ops/nn_test.py +++ b/tensorflow/python/ops/nn_test.py @@ -962,6 +962,16 @@ class LeakyReluTest(test_lib.TestCase): self.assertAllClose( outputs, [-0.4, -0.2, 0.0, 1.0, 2.0], rtol=tol, atol=tol) + def testName(self): + np_values = np.array([-2, -1, 0, 1, 2], dtype=np.float64) + outputs_with_name_set = nn_ops.leaky_relu( + constant_op.constant(np_values), + name='test_relu_op') + self.assertEqual(outputs_with_name_set.name, 'test_relu_op:0') + outputs_without_name_set = nn_ops.leaky_relu( + constant_op.constant(np_values)) + self.assertEqual(outputs_without_name_set.name, 'LeakyRelu:0') + class SwishTest(test_lib.TestCase): diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py index f8676ccb5f..219562de5d 100644 --- a/tensorflow/python/ops/script_ops.py +++ b/tensorflow/python/ops/script_ops.py @@ -23,6 +23,7 @@ import threading # Used by py_util.cc to get tracebacks. import traceback # pylint: disable=unused-import +import weakref import numpy as np import six @@ -129,11 +130,14 @@ class FuncRegistry(object): def __init__(self): self._lock = threading.Lock() self._unique_id = 0 # GUARDED_BY(self._lock) - self._funcs = {} + # Only store weakrefs to the funtions. The strong reference is stored in + # the graph. + self._funcs = weakref.WeakValueDictionary() def insert(self, func): """Registers `func` and returns a unique token for this entry.""" token = self._next_unique_token() + # Store a weakref to the function self._funcs[token] = func return token @@ -186,7 +190,7 @@ class FuncRegistry(object): Raises: ValueError: if no function is registered for `token`. """ - func = self._funcs[token] + func = self._funcs.get(token, None) if func is None: raise ValueError("callback %s is not found" % token) if isinstance(func, EagerFunc): @@ -228,19 +232,6 @@ _py_funcs = FuncRegistry() pywrap_tensorflow.InitializePyTrampoline(_py_funcs) -class CleanupFunc(object): - """A helper class to remove a registered function from _py_funcs.""" - - def __init__(self, token): - self._token = token - - def __del__(self): - if _py_funcs is not None: - # If _py_funcs is None, the program is most likely in shutdown, and the - # _py_funcs object has been destroyed already. - _py_funcs.remove(self._token) - - def _internal_py_func(func, inp, Tout, @@ -270,17 +261,15 @@ def _internal_py_func(func, # bound to that of the outer graph instead. graph = graph._outer_graph - cleanup = CleanupFunc(token) - # TODO(zhifengc): Consider adding a Graph method to collect # `cleanup` objects in one of its member. - if not hasattr(graph, "_cleanup_py_funcs_used_in_graph"): - graph._cleanup_py_funcs_used_in_graph = [] + if not hasattr(graph, "_py_funcs_used_in_graph"): + graph._py_funcs_used_in_graph = [] - # When `graph` is destroyed, elements in _cleanup_py_funcs_used_in_graph - # will be destroyed and their __del__ will remove the 'token' from - # the funcs registry. - graph._cleanup_py_funcs_used_in_graph.append(cleanup) + # Store a reference to the function in the graph to ensure it stays alive + # as long as the graph lives. When the graph is destroyed, the function + # is left to the garbage collector for destruction as well. + graph._py_funcs_used_in_graph.append(func) # pylint: enable=protected-access if eager: diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index 0130233746..c3b16a7bd5 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -84,6 +84,8 @@ def _convert_to_sparse_tensors(sp_inputs): # pylint: disable=protected-access @tf_export("sparse_concat") +@deprecation.deprecated_args( + None, "concat_dim is deprecated, use axis instead", "concat_dim") def sparse_concat(axis, sp_inputs, name=None, @@ -597,6 +599,8 @@ class KeywordRequired(object): @tf_export("sparse_split") +@deprecation.deprecated_args( + None, "split_dim is deprecated, use axis instead", "split_dim") def sparse_split(keyword_required=KeywordRequired(), sp_input=None, num_split=None, diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py index ae79c01949..0280c89c10 100644 --- a/tensorflow/python/ops/string_ops.py +++ b/tensorflow/python/ops/string_ops.py @@ -91,6 +91,59 @@ def string_split(source, delimiter=" ", skip_empty=True): # pylint: disable=inv shape.set_shape([2]) return sparse_tensor.SparseTensor(indices, values, shape) +@tf_export("strings.split") +def string_split_v2(source, sep=None, maxsplit=-1): + """Split elements of `source` based on `sep` into a `SparseTensor`. + + Let N be the size of source (typically N will be the batch size). Split each + element of `source` based on `sep` and return a `SparseTensor` + containing the split tokens. Empty tokens are ignored. + + For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c', + then the output will be + + st.indices = [0, 0; + 0, 1; + 1, 0; + 1, 1; + 1, 2] + st.shape = [2, 3] + st.values = ['hello', 'world', 'a', 'b', 'c'] + + If `sep` is given, consecutive delimiters are not grouped together and are + deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and + sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty + string, consecutive whitespace are regarded as a single separator, and the + result will contain no empty strings at the startor end if the string has + leading or trailing whitespace. + + Note that the above mentioned behavior matches python's str.split. + + Args: + source: `1-D` string `Tensor`, the strings to split. + sep: `0-D` string `Tensor`, the delimiter character. + maxsplit: An `int`. If `maxsplit > 0`, limit of the split of the result. + + Raises: + ValueError: If sep is not a string. + + Returns: + A `SparseTensor` of rank `2`, the strings split according to the delimiter. + The first column of the indices corresponds to the row in `source` and the + second column corresponds to the index of the split component in this row. + """ + if sep is None: + sep = '' + sep = ops.convert_to_tensor(sep, dtype=dtypes.string) + source = ops.convert_to_tensor(source, dtype=dtypes.string) + + indices, values, shape = gen_string_ops.string_split_v2( + source, sep=sep, maxsplit=maxsplit) + indices.set_shape([None, 2]) + values.set_shape([None]) + shape.set_shape([2]) + return sparse_tensor.SparseTensor(indices, values, shape) + def _reduce_join_reduction_dims(x, axis, reduction_indices): """Returns range(rank(x) - 1, 0, -1) if reduction_indices is None.""" diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index f49e2d314d..47414c28af 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -1786,6 +1786,23 @@ class variable_scope(object): assert v.name == "foo/bar/v:0" ``` + Simple example of how to reenter a premade variable scope safely: + + ```python + with tf.variable_scope("foo") as vs: + pass + + # Re-enter the variable scope. + with tf.variable_scope(vs, + auxiliary_name_scope=False) as vs1: + # Restore the original name_scope. + with tf.name_scope(vs1.original_name_scope): + v = tf.get_variable("v", [1]) + assert v.name == "foo/v:0" + c = tf.constant([1], name="c") + assert c.name == "foo/c:0" + ``` + Basic example of sharing a variable AUTO_REUSE: ```python @@ -1924,7 +1941,9 @@ class variable_scope(object): (which must have the same shape). Constraints are not safe to use when doing asynchronous distributed training. auxiliary_name_scope: If `True`, we create an auxiliary name scope with - the scope. If `False`, we don't touch name scope. + the scope. If `False`, we don't create it. Note that the argument is + not inherited, and it only takes effect for once when creating. You + should only use it for re-entering a premade variable scope. Returns: A scope that can be captured and reused. diff --git a/tensorflow/python/tools/import_pb_to_tensorboard.py b/tensorflow/python/tools/import_pb_to_tensorboard.py index 00de044505..00de044505 100755..100644 --- a/tensorflow/python/tools/import_pb_to_tensorboard.py +++ b/tensorflow/python/tools/import_pb_to_tensorboard.py diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 522965990b..b59f8e1f98 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -1719,7 +1719,7 @@ def tf_py_build_info_genrule(): name="py_build_info_gen", outs=["platform/build_info.py"], cmd= - "$(location //tensorflow/tools/build_info:gen_build_info.py) --raw_generate \"$@\" --build_config " + if_cuda("cuda", "cpu"), + "$(location //tensorflow/tools/build_info:gen_build_info.py) --raw_generate \"$@\" --build_config " + if_cuda("cuda", "cpu"), local=1, tools=[clean_dep("//tensorflow/tools/build_info:gen_build_info.py")],) diff --git a/tensorflow/tools/api/generator/create_python_api.py b/tensorflow/tools/api/generator/create_python_api.py index bca9fa49eb..671b7e387e 100644 --- a/tensorflow/tools/api/generator/create_python_api.py +++ b/tensorflow/tools/api/generator/create_python_api.py @@ -41,7 +41,11 @@ _GENERATED_FILE_HEADER = """# This file is MACHINE GENERATED! Do not edit. # Generated by: tensorflow/tools/api/generator/create_python_api.py script. \"\"\"%s \"\"\" + +from __future__ import print_function + """ +_GENERATED_FILE_FOOTER = "\n\ndel print_function\n" class SymbolExposedTwiceError(Exception): @@ -149,6 +153,7 @@ class _ModuleInitCodeBuilder(object): _names_with_underscore = [%s] __all__ = [_s for _s in dir() if not _s.startswith('_')] __all__.extend([_s for _s in _names_with_underscore]) +__all__.remove('print_function') ''' % underscore_names_str return module_text_map @@ -333,7 +338,8 @@ def create_api_files( if module or not root_init_template: contents = ( _GENERATED_FILE_HEADER % - get_module_docstring(module, package, api_name) + text) + get_module_docstring(module, package, api_name) + + text + _GENERATED_FILE_FOOTER) else: # Read base init file with open(root_init_template, 'r') as root_init_template_file: diff --git a/tensorflow/tools/api/golden/tensorflow.image.pbtxt b/tensorflow/tools/api/golden/tensorflow.image.pbtxt index 5bb3b3c444..10171b3d60 100644 --- a/tensorflow/tools/api/golden/tensorflow.image.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.image.pbtxt @@ -58,7 +58,7 @@ tf_module { } member_method { name: "decode_image" - argspec: "args=[\'contents\', \'channels\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'contents\', \'channels\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'uint8\'>\", \'None\'], " } member_method { name: "decode_jpeg" diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index dc2bd40096..3051c4437e 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -1533,6 +1533,10 @@ tf_module { argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { + name: "print" + argspec: "args=[\'input_\', \'data\', \'message\', \'first_n\', \'summarize\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " + } + member_method { name: "py_func" argspec: "args=[\'func\', \'inp\', \'Tout\', \'stateful\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " } diff --git a/tensorflow/tools/api/golden/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/tensorflow.strings.pbtxt index a3fbe95bba..b641c39feb 100644 --- a/tensorflow/tools/api/golden/tensorflow.strings.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.strings.pbtxt @@ -4,4 +4,8 @@ tf_module { name: "regex_full_match" argspec: "args=[\'input\', \'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "split" + argspec: "args=[\'source\', \'sep\', \'maxsplit\'], varargs=None, keywords=None, defaults=[\'None\', \'-1\'], " + } } diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh index 5fa75e1d61..883bb93647 100755 --- a/tensorflow/tools/ci_build/builds/pip.sh +++ b/tensorflow/tools/ci_build/builds/pip.sh @@ -322,6 +322,10 @@ create_activate_virtualenv_and_install_tensorflow() { pip install -v ${PIP_FLAGS} ${WHL_PATH} || \ die "pip install (forcing to reinstall tensorflow) FAILED" echo "Successfully installed pip package ${TF_WHEEL_PATH}" + + # Force downgrade setuptools. + pip install --upgrade setuptools==39.1.0 + } ################################################################################ diff --git a/tensorflow/tools/ci_build/builds/with_the_same_user b/tensorflow/tools/ci_build/builds/with_the_same_user index d4bf546d40..b216e3549f 100755 --- a/tensorflow/tools/ci_build/builds/with_the_same_user +++ b/tensorflow/tools/ci_build/builds/with_the_same_user @@ -40,7 +40,7 @@ if [ -n "${CI_BUILD_USER_FORCE_BADNAME}" ]; then ADDUSER_OPTS="--force-badname" fi -getent group "${CI_BUILD_GID}" || addgroup --gid "${CI_BUILD_GID}" "${CI_BUILD_GROUP}" +getent group "${CI_BUILD_GID}" || addgroup ${ADDUSER_OPTS} --gid "${CI_BUILD_GID}" "${CI_BUILD_GROUP}" getent passwd "${CI_BUILD_UID}" || adduser ${ADDUSER_OPTS} \ --gid "${CI_BUILD_GID}" --uid "${CI_BUILD_UID}" \ --gecos "${CI_BUILD_USER} (generated by with_the_same_user script)" \ diff --git a/tensorflow/tools/ci_build/ci_build.sh b/tensorflow/tools/ci_build/ci_build.sh index 072dd6ab99..1f0fd0387a 100755 --- a/tensorflow/tools/ci_build/ci_build.sh +++ b/tensorflow/tools/ci_build/ci_build.sh @@ -134,6 +134,12 @@ if [[ $? != "0" ]]; then die "ERROR: docker build failed. Dockerfile is at ${DOCKERFILE_PATH}" fi +# If caller wants the with_the_same_user script to allow bad usernames, +# pass the var to the docker environment +if [ -n "${CI_BUILD_USER_FORCE_BADNAME}" ]; then + CI_BUILD_USER_FORCE_BADNAME_ENV="-e CI_BUILD_USER_FORCE_BADNAME=yes" +fi + # Run the command inside the container. echo "Running '${COMMAND[*]}' inside ${DOCKER_IMG_NAME}..." mkdir -p ${WORKSPACE}/bazel-ci_build-cache @@ -148,6 +154,7 @@ ${DOCKER_BINARY} run --rm --pid=host \ -e "CI_BUILD_GROUP=$(id -g -n)" \ -e "CI_BUILD_GID=$(id -g)" \ -e "CI_TENSORFLOW_SUBMODULE_PATH=${CI_TENSORFLOW_SUBMODULE_PATH}" \ + ${CI_BUILD_USER_FORCE_BADNAME_ENV} \ -v ${WORKSPACE}:/workspace \ -w /workspace \ ${GPU_EXTRA_PARAMS} \ diff --git a/tensorflow/tools/ci_build/copy_binary.py b/tensorflow/tools/ci_build/copy_binary.py index 420d390d2b..148526492d 100755 --- a/tensorflow/tools/ci_build/copy_binary.py +++ b/tensorflow/tools/ci_build/copy_binary.py @@ -32,7 +32,8 @@ import shutil import tempfile import zipfile -TF_NIGHTLY_REGEX = r"(.+)tf_nightly(|_gpu)-(\d\.\d\.\d.dev[\d]{0,8})-(.+)\.whl" +TF_NIGHTLY_REGEX = (r"(.+)tf_nightly(|_gpu)-(\d\.[\d]{1,2}" + "\.\d.dev[\d]{0,8})-(.+)\.whl") BINARY_STRING_TEMPLATE = "%s-%s-%s.whl" diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh index 60290df833..88f1d04193 100755 --- a/tensorflow/tools/ci_build/install/install_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh @@ -115,3 +115,7 @@ pip2 install keras_applications==1.0.2 pip3 install keras_applications==1.0.2 pip2 install keras_preprocessing==1.0.1 pip3 install keras_preprocessing==1.0.1 + +# Install last working version of setuptools. +pip2 install --upgrade setuptools==39.1.0 +pip3 install --upgrade setuptools==39.1.0 diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh index edb9d4b929..acd69ef346 100755 --- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh @@ -39,7 +39,6 @@ if [[ -z $pip35_version ]]; then fi set -e -pip3.5 install --upgrade setuptools pip3.5 install --upgrade pip pip3.5 install --upgrade virtualenv @@ -86,4 +85,7 @@ pip3.5 install --upgrade termcolor pip3.5 install keras_applications==1.0.2 pip3.5 install keras_preprocessing==1.0.1 +# Install last working version of setuptools. +pip3.5 install --upgrade setuptools==39.1.0 + # LINT.ThenChange(//tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh) diff --git a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh index 5635977731..323b30f48e 100755 --- a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh @@ -49,7 +49,6 @@ cd Python-3.6.1 make altinstall ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3 -pip3 install --upgrade setuptools pip3 install --upgrade pip pip3 install --upgrade virtualenv @@ -101,4 +100,8 @@ pip3 install --upgrade termcolor # Keras pip3.5 install keras_applications==1.0.2 pip3.5 install keras_preprocessing==1.0.1 + +# Install last working version of setuptools. +pip3 install --upgrade setuptools==39.1.0 + # LINT.ThenChange(//tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh) diff --git a/tensorflow/tools/ci_build/linux/mkl/basic-mkl-test.sh b/tensorflow/tools/ci_build/linux/mkl/basic-mkl-test.sh new file mode 100755 index 0000000000..10a09a415a --- /dev/null +++ b/tensorflow/tools/ci_build/linux/mkl/basic-mkl-test.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# Usage: basic_mkl_test.sh + +# Helper function to traverse directories up until given file is found. +function upsearch () { + test / == "$PWD" && return || \ + test -e "$1" && echo "$PWD" && return || \ + cd .. && upsearch "$1" +} + +# Set up WORKSPACE. +WORKSPACE="${WORKSPACE:-$(upsearch WORKSPACE)}" + +BUILD_TAG=mkl-ci-test CI_BUILD_USER_FORCE_BADNAME=yes ${WORKSPACE}/tensorflow/tools/ci_build/ci_build.sh cpu tensorflow/tools/ci_build/linux/cpu/run_mkl.sh diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh index 1bd1852ffc..b8bce57c87 100755 --- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh +++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh @@ -79,6 +79,7 @@ if [[ $1 == "PI_ONE" ]]; then --linkopt=-L${OPENBLAS_INSTALL_PATH}/lib/ --linkopt=-l:libopenblas.a" echo "Building for the Pi One/Zero, with no NEON support" + WHEEL_ARCH=linux_armv6l else PI_COPTS='--copt=-march=armv7-a --copt=-mfpu=neon-vfpv4 --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR @@ -86,6 +87,7 @@ else --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8' + WHEEL_ARCH=linux_armv7l echo "Building for the Pi Two/Three, with NEON acceleration" fi @@ -100,6 +102,8 @@ bazel build -c opt ${PI_COPTS} \ --copt=-fomit-frame-pointer --cpu=armeabi \ --crosstool_top=@local_config_arm_compiler//:toolchain \ --verbose_failures \ + //tensorflow:libtensorflow.so \ + //tensorflow:libtensorflow_framework.so \ //tensorflow/tools/benchmark:benchmark_model \ //tensorflow/tools/pip_package:build_pip_package @@ -112,10 +116,12 @@ BDIST_OPTS="--universal" \ bazel-bin/tensorflow/tools/pip_package/build_pip_package "${OUTDIR}" OLD_FN=$(ls "${OUTDIR}" | grep -m 1 \.whl) -SUB='s/tensorflow-([^-]+)-([^-]+)-.*/tensorflow-\1-\2-none-any.whl/; print' +SUB='s/tensorflow-([^-]+)-([^-]+)-.*/tensorflow-\1-\2-none-'${WHEEL_ARCH}'.whl/; print' NEW_FN=$(echo "${OLD_FN}" | perl -ne "${SUB}") mv "${OUTDIR}/${OLD_FN}" "${OUTDIR}/${NEW_FN}" cp bazel-bin/tensorflow/tools/benchmark/benchmark_model "${OUTDIR}" +cp bazel-bin/tensorflow/libtensorflow.so "${OUTDIR}" +cp bazel-bin/tensorflow/libtensorflow_framework.so "${OUTDIR}" echo "Output can be found here:" find "${OUTDIR}" diff --git a/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl b/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl index 47539b2423..f8f63e276c 100644 --- a/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl +++ b/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl @@ -31,7 +31,11 @@ def _def_file_filter_configure_impl(repository_ctx): vc_path = find_vc_path(repository_ctx) if vc_path == "visual-studio-not-found": auto_configure_fail("Visual C++ build tools not found on your machine") - undname_bin_path = find_msvc_tool(repository_ctx, vc_path, "undname.exe").replace("\\", "\\\\") + + undname = find_msvc_tool(repository_ctx, vc_path, "undname.exe") + if undname == None: + auto_configure_fail("Couldn't find undname.exe under %s, please check your VC installation and set BAZEL_VC environment variable correctly." % vc_path) + undname_bin_path = undname.replace("\\", "\\\\") repository_ctx.template( "def_file_filter.py", diff --git a/tensorflow/tools/dist_test/local_test.sh b/tensorflow/tools/dist_test/local_test.sh index 06c2b997cb..b0114721bd 100755 --- a/tensorflow/tools/dist_test/local_test.sh +++ b/tensorflow/tools/dist_test/local_test.sh @@ -64,9 +64,6 @@ die() { # Configurations DOCKER_IMG_NAME="tensorflow/tf-dist-test-local-cluster" -# Use TensorFlow v1.5.0 for Python 2.7 and CPU only as we set num_gpus to 0 in the below -DEFAULT_WHL_FILE_LOCATION="https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp27-none-linux_x86_64.whl" - # Parse input arguments LEAVE_CONTAINER_RUNNING=0 MODEL_NAME="" @@ -77,8 +74,7 @@ SYNC_REPLICAS_FLAG="" WHL_FILE_LOCATION=${1} if [[ -z "${WHL_FILE_LOCATION}" ]]; then - WHL_FILE_LOCATION=${DEFAULT_WHL_FILE_LOCATION} - echo "use default whl file location" + echo "WARNING: No wheel url passed. Will use latest tf-nightly cpu p2 wheel." fi while true; do @@ -131,7 +127,11 @@ echo "Building in temporary directory: ${BUILD_DIR}" cp -r ${DIR}/* "${BUILD_DIR}"/ || \ die "Failed to copy files to ${BUILD_DIR}" -if [[ $WHL_FILE_LOCATION =~ 'http://' || $WHL_FILE_LOCATION =~ 'https://' ]]; then +# Download whl file into the build context directory. +if [[ -z "${WHL_FILE_LOCATION}" ]]; then + pip2 download --no-deps tf-nightly + cp tf-nightly-*.whl "${BUILD_DIR}"/tensorflow-none-any.whl +elif [[ $WHL_FILE_LOCATION =~ 'http://' || $WHL_FILE_LOCATION =~ 'https://' ]]; then # Download whl file into the build context directory. wget -P "${BUILD_DIR}" "${WHL_FILE_LOCATION}" || \ die "Failed to download tensorflow whl file from URL: ${WHL_FILE_LOCATION}" diff --git a/tensorflow/tools/dist_test/remote_test.sh b/tensorflow/tools/dist_test/remote_test.sh index 935535312d..e188c88c8f 100755 --- a/tensorflow/tools/dist_test/remote_test.sh +++ b/tensorflow/tools/dist_test/remote_test.sh @@ -108,7 +108,7 @@ fi # Parse command-line arguments. WHL_URL=${1} if [[ -z "${WHL_URL}" ]]; then - die "whl URL is not specified" + echo "WARNING: No wheel url passed. Will use latest tf-nightly cpu p2 wheel." fi # Create docker build context directory. @@ -121,8 +121,13 @@ cp -r ${DIR}/* ${BUILD_DIR}/ || \ die "Failed to copy files to ${BUILD_DIR}" # Download whl file into the build context directory. -wget -P "${BUILD_DIR}" ${WHL_URL} || \ - die "Failed to download tensorflow whl file from URL: ${WHL_URL}" +if [[ -z "${WHL_URL}" ]]; then + pip2 download --no-deps tf-nightly + cp tf-nightly-*.whl "${BUILD_DIR}"/tensorflow-none-any.whl +else + wget -P "${BUILD_DIR}" ${WHL_URL} || \ + die "Failed to download tensorflow whl file from URL: ${WHL_URL}" +fi # Build docker image for test. docker build ${NO_CACHE_FLAG} \ diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel index 406d134699..57a491255e 100644 --- a/tensorflow/tools/docker/Dockerfile.devel +++ b/tensorflow/tools/docker/Dockerfile.devel @@ -76,7 +76,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.8 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.9 --depth=1 https://github.com/tensorflow/tensorflow.git . # TODO(craigcitro): Don't install the pip package, since it makes it # more difficult to experiment with local changes. Instead, just add diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl index a6cd44ced1..6796ad70e5 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl +++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl @@ -3,7 +3,7 @@ FROM tensorflow/tensorflow:latest-devel LABEL maintainer="Clayne Robison<clayne.b.robison@intel.com>" # These arguments are parameterized. Use --build-args to override. -ARG TF_BRANCH=r1.8 +ARG TF_BRANCH=r1.9 ARG WHL_DIR=/whl RUN apt-get update && apt-get install -y --no-install-recommends \ diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu index 2fe47f3356..204b5b4dba 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu @@ -13,8 +13,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ cuda-cusparse-dev-9-0 \ curl \ git \ - libcudnn7=7.0.5.15-1+cuda9.0 \ - libcudnn7-dev=7.0.5.15-1+cuda9.0 \ + libcudnn7=7.1.4.18-1+cuda9.0 \ + libcudnn7-dev=7.1.4.18-1+cuda9.0 \ libcurl3-dev \ libfreetype6-dev \ libhdf5-serial-dev \ @@ -85,7 +85,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.8 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.9 --depth=1 https://github.com/tensorflow/tensorflow.git . # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu index bff4a20392..9197651ff4 100644 --- a/tensorflow/tools/docker/Dockerfile.gpu +++ b/tensorflow/tools/docker/Dockerfile.gpu @@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ cuda-cusolver-9-0 \ cuda-cusparse-9-0 \ curl \ - libcudnn7=7.0.5.15-1+cuda9.0 \ + libcudnn7=7.1.4.18-1+cuda9.0 \ libfreetype6-dev \ libhdf5-serial-dev \ libpng12-dev \ diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 5910f0625e..620fef9363 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -61,6 +61,7 @@ COMMON_PIP_DEPS = [ "//tensorflow/contrib/autograph/core:core", "//tensorflow/contrib/autograph/impl:impl", "//tensorflow/contrib/autograph/lang:lang", + "//tensorflow/contrib/autograph/operators:operators", "//tensorflow/contrib/autograph/pyct:pyct", "//tensorflow/contrib/autograph/pyct/static_analysis:static_analysis", "//tensorflow/contrib/boosted_trees:boosted_trees_pip", diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh index 0c4065bc77..f7e42ce536 100755 --- a/tensorflow/tools/pip_package/build_pip_package.sh +++ b/tensorflow/tools/pip_package/build_pip_package.sh @@ -41,51 +41,15 @@ function is_windows() { fi } -function main() { +function prepare_src() { if [ $# -lt 1 ] ; then echo "No destination dir provided" exit 1 fi - DEST=$(real_path $1) - TMPDIR=$(mktemp -d -t tmp.XXXXXXXXXX) - - PKG_NAME_FLAG="" - GPU_BUILD=0 - NIGHTLY_BUILD=0 - PROJECT_NAME="" - while true; do - if [[ "$1" == "--nightly_flag" ]]; then - NIGHTLY_BUILD=1 - elif [[ "$1" == "--gpu" ]]; then - GPU_BUILD=1 - elif [[ "$1" == "--gpudirect" ]]; then - PKG_NAME_FLAG="--project_name tensorflow_gpudirect" - elif [[ "$1" == "--project_name" ]]; then - shift - if [[ -z "$1" ]]; then - break - fi - PROJECT_NAME="$1" - fi - shift - - if [[ -z "$1" ]]; then - break - fi - done - - if [[ -n ${PROJECT_NAME} ]]; then - PKG_NAME_FLAG="--project_name ${PROJECT_NAME}" - elif [[ ${NIGHTLY_BUILD} == "1" && ${GPU_BUILD} == "1" ]]; then - PKG_NAME_FLAG="--project_name tf_nightly_gpu" - elif [[ ${NIGHTLY_BUILD} == "1" ]]; then - PKG_NAME_FLAG="--project_name tf_nightly" - elif [[ ${GPU_BUILD} == "1" ]]; then - PKG_NAME_FLAG="--project_name tensorflow_gpu" - fi - - echo $(date) : "=== Using tmpdir: ${TMPDIR}" + TMPDIR="$1" + mkdir -p "$TMPDIR" + echo $(date) : "=== Preparing sources in dir: ${TMPDIR}" if [ ! -d bazel-bin/tensorflow ]; then echo "Could not find bazel-bin. Did you run from the root of the build tree?" @@ -155,17 +119,28 @@ function main() { # over so user defined ops can be compiled. mkdir -p ${TMPDIR}/google mkdir -p ${TMPDIR}/third_party - pushd ${RUNFILES%org_tensorflow} + pushd ${RUNFILES%org_tensorflow} > /dev/null for header in $(find protobuf_archive -name \*.h); do mkdir -p "${TMPDIR}/google/$(dirname ${header})" cp "$header" "${TMPDIR}/google/$(dirname ${header})/" done - popd + popd > /dev/null cp -R $RUNFILES/third_party/eigen3 ${TMPDIR}/third_party cp tensorflow/tools/pip_package/MANIFEST.in ${TMPDIR} cp tensorflow/tools/pip_package/README ${TMPDIR} cp tensorflow/tools/pip_package/setup.py ${TMPDIR} +} + +function build_wheel() { + if [ $# -lt 2 ] ; then + echo "No src and dest dir provided" + exit 1 + fi + + TMPDIR="$1" + DEST="$2" + PKG_NAME_FLAG="$3" # Before we leave the top-level directory, make sure we know how to # call python. @@ -173,15 +148,110 @@ function main() { source tools/python_bin_path.sh fi - pushd ${TMPDIR} + pushd ${TMPDIR} > /dev/null rm -f MANIFEST echo $(date) : "=== Building wheel" "${PYTHON_BIN_PATH:-python}" setup.py bdist_wheel ${PKG_NAME_FLAG} >/dev/null mkdir -p ${DEST} cp dist/* ${DEST} - popd - rm -rf ${TMPDIR} + popd > /dev/null echo $(date) : "=== Output wheel file is in: ${DEST}" } +function usage() { + echo "Usage:" + echo "$0 [--src srcdir] [--dst dstdir] [options]" + echo "$0 dstdir [options]" + echo "" + echo " --src prepare sources in srcdir" + echo " will use temporary dir if not specified" + echo "" + echo " --dst build wheel in dstdir" + echo " if dstdir is not set do not build, only prepare sources" + echo "" + echo " Options:" + echo " --project_name <name> set project name to name" + echo " --gpu build tensorflow_gpu" + echo " --gpudirect build tensorflow_gpudirect" + echo " --nightly_flag build tensorflow nightly" + echo "" + exit 1 +} + +function main() { + PKG_NAME_FLAG="" + PROJECT_NAME="" + GPU_BUILD=0 + NIGHTLY_BUILD=0 + SRCDIR="" + DSTDIR="" + CLEANSRC=1 + while true; do + if [[ "$1" == "--help" ]]; then + usage + exit 1 + elif [[ "$1" == "--nightly_flag" ]]; then + NIGHTLY_BUILD=1 + elif [[ "$1" == "--gpu" ]]; then + GPU_BUILD=1 + elif [[ "$1" == "--gpudirect" ]]; then + PKG_NAME_FLAG="--project_name tensorflow_gpudirect" + elif [[ "$1" == "--project_name" ]]; then + shift + if [[ -z "$1" ]]; then + break + fi + PROJECT_NAME="$1" + elif [[ "$1" == "--src" ]]; then + shift + SRCDIR="$(real_path $1)" + CLEANSRC=0 + elif [[ "$1" == "--dst" ]]; then + shift + DSTDIR="$(real_path $1)" + else + DSTDIR="$(real_path $1)" + fi + shift + + if [[ -z "$1" ]]; then + break + fi + done + + if [[ -z "$DSTDIR" ]] && [[ -z "$SRCDIR" ]]; then + echo "No destination dir provided" + usage + exit 1 + fi + + if [[ -z "$SRCDIR" ]]; then + # make temp srcdir if none set + SRCDIR="$(mktemp -d -t tmp.XXXXXXXXXX)" + fi + + prepare_src "$SRCDIR" + + if [[ -z "$DSTDIR" ]]; then + # only want to prepare sources + exit + fi + + if [[ -n ${PROJECT_NAME} ]]; then + PKG_NAME_FLAG="--project_name ${PROJECT_NAME}" + elif [[ ${NIGHTLY_BUILD} == "1" && ${GPU_BUILD} == "1" ]]; then + PKG_NAME_FLAG="--project_name tf_nightly_gpu" + elif [[ ${NIGHTLY_BUILD} == "1" ]]; then + PKG_NAME_FLAG="--project_name tf_nightly" + elif [[ ${GPU_BUILD} == "1" ]]; then + PKG_NAME_FLAG="--project_name tensorflow_gpu" + fi + + build_wheel "$SRCDIR" "$DSTDIR" "$PKG_NAME_FLAG" + + if [[ $CLEANSRC -ne 0 ]]; then + rm -rf "${TMPDIR}" + fi +} + main "$@" diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index d25a9e77b1..97f625e7e9 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -45,7 +45,7 @@ DOCLINES = __doc__.split('\n') # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.8.0' +_VERSION = '1.9.0-rc0' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', @@ -54,6 +54,7 @@ REQUIRED_PACKAGES = [ 'numpy >= 1.13.3', 'six >= 1.10.0', 'protobuf >= 3.4.0', + 'setuptools <= 39.1.0', 'tensorboard >= 1.8.0, < 1.9.0', 'termcolor >= 1.1.0', ] diff --git a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc index 29add6d5ea..15d7c70281 100644 --- a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc +++ b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc @@ -814,6 +814,9 @@ void Generator::Generate(const FileDescriptor& fd) { // Add header to cc file. SetOutput(&cc_); Print("// GENERATED FILE - DO NOT MODIFY"); + Print(); + Print("#include <algorithm>"); // for `std::stable_sort()` + Print(); headers = {GetProtoTextHeaderName(fd, true /* impl */)}; AddHeadersToCurrentSection(headers); Print(); diff --git a/tensorflow/tools/quantization/quantize_graph_test.py b/tensorflow/tools/quantization/quantize_graph_test.py index df71840b64..92bb5127da 100644 --- a/tensorflow/tools/quantization/quantize_graph_test.py +++ b/tensorflow/tools/quantization/quantize_graph_test.py @@ -119,8 +119,8 @@ def are_tensors_near(a, b, tolerance): flat_a = a.flatten() flat_b = b.flatten() if len(flat_a) != len(flat_b): - print("Tensors are different sizes: " + str(len(flat_a)) + " vs " + str( - len(flat_b))) + tf_logging.info("Tensors are different sizes: " + str(len(flat_a)) + " vs " + + str(len(flat_b))) return False value_count = len(flat_a) how_many_different = 0 @@ -140,10 +140,10 @@ def are_tensors_near(a, b, tolerance): if how_many_different == 0: return True else: - print("Tensors have {0} different values ({1}%), with mean difference" - " {2} and mean absolute difference {3}".format( - how_many_different, proportion_different * 100, mean_difference, - mean_abs_difference)) + tf_logging.info("Tensors have {0} different values ({1}%), with mean" + " difference {2} and mean absolute difference {3}".format( + how_many_different, proportion_different * 100, + mean_difference, mean_abs_difference)) return False diff --git a/tensorflow/tools/test/upload_test_benchmarks.py b/tensorflow/tools/test/upload_test_benchmarks.py index 9c45359ee1..c030575109 100644 --- a/tensorflow/tools/test/upload_test_benchmarks.py +++ b/tensorflow/tools/test/upload_test_benchmarks.py @@ -89,7 +89,6 @@ import shutil from six import text_type from google.cloud import datastore -from six import text_type def is_real_file(dirpath, fname): diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index dbec66216a..4f3df570a5 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -50,31 +50,31 @@ def tf_workspace(path_prefix="", tf_repo_name=""): mkl_repository( name = "mkl_linux", urls = [ - "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.13/mklml_lnx_2018.0.2.20180127.tgz", - "https://github.com/intel/mkl-dnn/releases/download/v0.13/mklml_lnx_2018.0.2.20180127.tgz", + "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.14/mklml_lnx_2018.0.3.20180406.tgz", + "https://github.com/intel/mkl-dnn/releases/download/v0.14/mklml_lnx_2018.0.3.20180406.tgz" ], - sha256 = "74844bd77294742bf2396ff040369d1aa4cdd9e826fcd38cf8398ae83564d146", - strip_prefix = "mklml_lnx_2018.0.2.20180127", + sha256 = "d2305244fdc9b87db7426ed4496e87a4b3977ad3374d73b8000e8b7a5b7aa725", + strip_prefix = "mklml_lnx_2018.0.3.20180406", build_file = clean_dep("//third_party/mkl:mkl.BUILD") ) mkl_repository( name = "mkl_windows", urls = [ - "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.13/mklml_win_2018.0.2.20180127.zip", - "https://github.com/intel/mkl-dnn/releases/download/v0.13/mklml_win_2018.0.2.20180127.zip" + "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.14/mklml_win_2018.0.3.20180406.zip", + "https://github.com/intel/mkl-dnn/releases/download/v0.14/mklml_win_2018.0.3.20180406.zip" ], - sha256 = "d8fbf0faa0684bffa3548005d05fe5cfe56ff9dbc0e15e7612d7ac01055a6ded", - strip_prefix = "mklml_win_2018.0.2.20180127", + sha256 = "a584a5bf1c8d2ad70b90d12b52652030e9a338217719064fdb84b7ad0d693694", + strip_prefix = "mklml_win_2018.0.3.20180406", build_file = clean_dep("//third_party/mkl:mkl.BUILD") ) mkl_repository( name = "mkl_darwin", urls = [ - "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.13/mklml_mac_2018.0.2.20180127.tgz", - "https://github.com/intel/mkl-dnn/releases/download/v0.13/mklml_mac_2018.0.2.20180127.tgz" + "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.14/mklml_mac_2018.0.3.20180406.tgz", + "https://github.com/intel/mkl-dnn/releases/download/v0.14/mklml_mac_2018.0.3.20180406.tgz" ], - sha256 = "aa740d71e14562bfea56e6829e6dc186e7487cbcf6748a88dec73826b7ec1943", - strip_prefix = "mklml_mac_2018.0.2.20180127", + sha256 = "094e3dfd61c816136dc8d12a45cc611ce26c5f4828176a3644cd0b0efa15a25b", + strip_prefix = "mklml_mac_2018.0.3.20180406", build_file = clean_dep("//third_party/mkl:mkl.BUILD") ) @@ -85,11 +85,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "mkl_dnn", urls = [ - "https://mirror.bazel.build/github.com/intel/mkl-dnn/archive/v0.13.tar.gz", - "https://github.com/intel/mkl-dnn/archive/v0.13.tar.gz", + "https://mirror.bazel.build/github.com/intel/mkl-dnn/archive/v0.14.tar.gz", + "https://github.com/intel/mkl-dnn/archive/v0.14.tar.gz", ], - sha256 = "d2cfd93a70cfe86ebe054477c530c9b5c1218b70f75856eb6d1956c68ee89e8f", - strip_prefix = "mkl-dnn-0.13", + sha256 = "efebc53882856afec86457a2da644693f5d59c68772d41d640d6b60a8efc4eb0", + strip_prefix = "mkl-dnn-0.14", build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"), ) @@ -187,11 +187,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "highwayhash", urls = [ - "https://mirror.bazel.build/github.com/google/highwayhash/archive/dfcb97ca4fe9277bf9dc1802dd979b071896453b.tar.gz", - "https://github.com/google/highwayhash/archive/dfcb97ca4fe9277bf9dc1802dd979b071896453b.tar.gz", + "http://mirror.bazel.build/github.com/google/highwayhash/archive/fd3d9af80465e4383162e4a7c5e2f406e82dd968.tar.gz", + "https://github.com/google/highwayhash/archive/fd3d9af80465e4383162e4a7c5e2f406e82dd968.tar.gz", ], - sha256 = "0f30a15b1566d93f146c8d149878a06e91d9bb7ec2cfd76906df62a82be4aac9", - strip_prefix = "highwayhash-dfcb97ca4fe9277bf9dc1802dd979b071896453b", + sha256 = "9c3e0e87d581feeb0c18d814d98f170ff23e62967a2bd6855847f0b2fe598a37", + strip_prefix = "highwayhash-fd3d9af80465e4383162e4a7c5e2f406e82dd968", build_file = clean_dep("//third_party:highwayhash.BUILD"), ) |