From f2b17b22e12bd743b66945070f338f70b5fa3332 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 25 Sep 2018 21:54:20 -0700 Subject: Allow subslicing Tensors with a single dimension. PiperOrigin-RevId: 214553359 --- tensorflow/core/framework/tensor.cc | 2 +- tensorflow/core/framework/tensor.h | 2 +- tensorflow/core/framework/tensor_test.cc | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc index 3df677675e..1dea6da911 100644 --- a/tensorflow/core/framework/tensor.cc +++ b/tensorflow/core/framework/tensor.cc @@ -813,7 +813,7 @@ Tensor Tensor::Slice(int64 start, int64 limit) const { } Tensor Tensor::SubSlice(int64 index) const { - CHECK_GE(dims(), 2); // Crash ok. + CHECK_GE(dims(), 1); // Crash ok. CHECK_LE(0, index); // Crash ok. int64 dim0_size = shape_.dim_size(0); CHECK_LE(index, dim0_size); // Crash ok. diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h index 8a0c70fef2..d0f9eb56e2 100644 --- a/tensorflow/core/framework/tensor.h +++ b/tensorflow/core/framework/tensor.h @@ -219,7 +219,7 @@ class Tensor { /// must check the returned tensor's alignment before calling certain /// methods that have alignment requirement (e.g., `flat()`, `tensor()`). /// - /// REQUIRES: `dims()` >= 2 + /// REQUIRES: `dims()` >= 1 /// REQUIRES: `0 <= dim0_start < dim_size(0)` Tensor SubSlice(int64 index) const; diff --git a/tensorflow/core/framework/tensor_test.cc b/tensorflow/core/framework/tensor_test.cc index 0bfa53e6c5..c596604143 100644 --- a/tensorflow/core/framework/tensor_test.cc +++ b/tensorflow/core/framework/tensor_test.cc @@ -1246,6 +1246,9 @@ TEST(Tensor, SubSlice_Basic) { EXPECT_EQ(&tx(5, j, k), &ty(j, k)); } } + Tensor z = y.SubSlice(3).SubSlice(31); + auto tz = z.unaligned_flat(); + EXPECT_EQ(*tz.data(), 5.0); } { // Test unaligned access via a SubSlice. -- cgit v1.2.3 From c63d21b0bfc534b6377b332e9d2ba2abbdb7e0eb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 25 Sep 2018 22:57:54 -0700 Subject: Adds a build flag to enable MKL (mkl_enabled=true). PiperOrigin-RevId: 214557082 --- tensorflow/contrib/cmake/CMakeLists.txt | 2 +- .../direct_session_with_tracking_alloc_test.cc | 8 ++--- .../core/common_runtime/mkl_cpu_allocator_test.cc | 4 +-- .../core/common_runtime/threadpool_device.cc | 5 ++- tensorflow/core/graph/mkl_layout_pass.cc | 4 +++ tensorflow/core/graph/mkl_layout_pass_test.cc | 4 +-- tensorflow/core/graph/mkl_tfconversion_pass.cc | 2 ++ .../core/graph/mkl_tfconversion_pass_test.cc | 4 +-- tensorflow/core/kernels/batch_matmul_op_complex.cc | 10 ++++-- tensorflow/core/kernels/batch_matmul_op_real.cc | 9 +++-- tensorflow/core/kernels/cwise_ops_common.cc | 4 +-- tensorflow/core/kernels/gather_nd_op_cpu_impl.h | 6 ++-- tensorflow/core/kernels/matmul_op.cc | 8 ++--- tensorflow/core/kernels/mkl_batch_matmul_op.cc | 2 ++ tensorflow/core/kernels/mkl_matmul_op.cc | 6 ++-- tensorflow/core/kernels/slice_op.cc | 14 +++----- tensorflow/core/kernels/transpose_op.cc | 10 +++--- tensorflow/core/util/port.cc | 4 +-- tensorflow/tensorflow.bzl | 3 ++ third_party/mkl/BUILD | 23 ++++++++---- third_party/mkl/build_defs.bzl | 41 ++++++++++++++++------ third_party/mkl_dnn/BUILD | 6 ++-- third_party/mkl_dnn/build_defs.bzl | 2 +- tools/bazel.rc | 5 +-- 24 files changed, 117 insertions(+), 69 deletions(-) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index ebcabb4223..c6d6f04168 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -353,7 +353,7 @@ endif() # MKL Support if (tensorflow_ENABLE_MKL_SUPPORT) - add_definitions(-DINTEL_MKL -DEIGEN_USE_VML) + add_definitions(-DINTEL_MKL -DEIGEN_USE_VML -DENABLE_MKL) include(mkl) list(APPEND tensorflow_EXTERNAL_LIBRARIES ${mkl_STATIC_LIBRARIES}) list(APPEND tensorflow_EXTERNAL_DEPENDENCIES mkl_copy_shared_to_destination) diff --git a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc index 2ed4f69f90..efd6185f8b 100644 --- a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc +++ b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc @@ -108,7 +108,7 @@ TEST(DirectSessionWithTrackingAllocTest, CostModelTest) { EXPECT_EQ(2, shape.dim(0).size()); EXPECT_EQ(1, shape.dim(1).size()); if (node->name() == y->name()) { -#ifdef INTEL_MKL +#if defined(INTEL_MKL) && defined(ENABLE_MKL) // if MKL is used, it goes through various additional // graph rewrite pass. In TF, everytime a graph pass // happens, "constant" nodes are allocated @@ -120,13 +120,13 @@ TEST(DirectSessionWithTrackingAllocTest, CostModelTest) { EXPECT_EQ(29, cm->AllocationId(node, 0)); #else EXPECT_EQ(21, cm->AllocationId(node, 0)); -#endif +#endif // INTEL_MKL && ENABLE_MKL } else { -#ifdef INTEL_MKL +#if defined(INTEL_MKL) && defined(ENABLE_MKL) EXPECT_EQ(30, cm->AllocationId(node, 0)); #else EXPECT_EQ(22, cm->AllocationId(node, 0)); -#endif +#endif // INTEL_MKL && ENABLE_MKL } } EXPECT_LE(0, cm->MaxExecutionTime(node)); diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator_test.cc b/tensorflow/core/common_runtime/mkl_cpu_allocator_test.cc index a67411cd2e..e08ab57638 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator_test.cc +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifdef INTEL_MKL +#if defined(INTEL_MKL) && defined(ENABLE_MKL) #include "tensorflow/core/common_runtime/mkl_cpu_allocator.h" @@ -50,4 +50,4 @@ TEST(MKLBFCAllocatorTest, TestMaxLimit) { } // namespace tensorflow -#endif // INTEL_MKL +#endif // INTEL_MKL && ENABLE_MKL diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc index 0fbc20b34b..8587d1783a 100644 --- a/tensorflow/core/common_runtime/threadpool_device.cc +++ b/tensorflow/core/common_runtime/threadpool_device.cc @@ -113,8 +113,11 @@ class MklCPUAllocatorFactory : public AllocatorFactory { } }; +#ifdef ENABLE_MKL REGISTER_MEM_ALLOCATOR("MklCPUAllocator", 200, MklCPUAllocatorFactory); +#endif // ENABLE_MKL + } // namespace -#endif +#endif // INTEL_MKL } // namespace tensorflow diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index f5b0105862..37b88f1728 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -977,7 +977,9 @@ std::vector MklLayoutRewritePass::cinfo_; // nodes. Do not change the ordering of the Mkl passes. const OptimizationPassRegistry::Grouping kMklLayoutRewritePassGroup = OptimizationPassRegistry::POST_PARTITIONING; +#ifdef ENABLE_MKL REGISTER_OPTIMIZATION(kMklLayoutRewritePassGroup, 1, MklLayoutRewritePass); +#endif // ENABLE_MKL ////////////////////////////////////////////////////////////////////////// // Helper functions for creating new node @@ -3150,7 +3152,9 @@ MklLayoutRewritePass::ConstStringsInfo MklLayoutRewritePass::csinfo_; // nodes. Do not change the ordering of the Mkl passes. const OptimizationPassRegistry::Grouping kMklLayoutRewritePassGroup = OptimizationPassRegistry::POST_PARTITIONING; +#ifdef ENABLE_MKL REGISTER_OPTIMIZATION(kMklLayoutRewritePassGroup, 1, MklLayoutRewritePass); +#endif // ENABLE_MKL ////////////////////////////////////////////////////////////////////////// // Helper functions for creating new node diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc index e8bac847e5..f42a4ee98b 100644 --- a/tensorflow/core/graph/mkl_layout_pass_test.cc +++ b/tensorflow/core/graph/mkl_layout_pass_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifdef INTEL_MKL +#if defined(INTEL_MKL) && defined(ENABLE_MKL) #include "tensorflow/core/graph/mkl_layout_pass.h" #include "tensorflow/core/graph/mkl_graph_util.h" @@ -3586,4 +3586,4 @@ BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000); } // namespace tensorflow -#endif /* INTEL_MKL */ +#endif // INTEL_MKL && ENABLE_MKL diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc index b67a321fc1..8c5ffd71a3 100644 --- a/tensorflow/core/graph/mkl_tfconversion_pass.cc +++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc @@ -133,7 +133,9 @@ class MklToTfConversionPass : public GraphOptimizationPass { // complete picture of inputs and outputs of the nodes in the graphs. const OptimizationPassRegistry::Grouping kMklTfConvPassGroup = OptimizationPassRegistry::POST_PARTITIONING; +#ifdef ENABLE_MKL REGISTER_OPTIMIZATION(kMklTfConvPassGroup, 2, MklToTfConversionPass); +#endif // ENABLE_MKL Status MklToTfConversionPass::InsertConversionNodeOnEdge( std::unique_ptr* g, Edge* e) { diff --git a/tensorflow/core/graph/mkl_tfconversion_pass_test.cc b/tensorflow/core/graph/mkl_tfconversion_pass_test.cc index ebcb6de551..319437a801 100644 --- a/tensorflow/core/graph/mkl_tfconversion_pass_test.cc +++ b/tensorflow/core/graph/mkl_tfconversion_pass_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifdef INTEL_MKL +#if defined(INTEL_MKL) && defined(ENABLE_MKL) #include "tensorflow/core/graph/mkl_tfconversion_pass.h" #include "tensorflow/core/graph/mkl_graph_util.h" @@ -304,4 +304,4 @@ BENCHMARK(BM_RunMklToTfConversionPass)->Arg(1000)->Arg(10000); } // namespace } // namespace tensorflow -#endif /* INTEL_MKL */ +#endif // INTEL_MKL && ENABLE_MKL diff --git a/tensorflow/core/kernels/batch_matmul_op_complex.cc b/tensorflow/core/kernels/batch_matmul_op_complex.cc index 54c45bfe63..f48bd0c318 100644 --- a/tensorflow/core/kernels/batch_matmul_op_complex.cc +++ b/tensorflow/core/kernels/batch_matmul_op_complex.cc @@ -17,14 +17,18 @@ limitations under the License. namespace tensorflow { -#if !defined(INTEL_MKL) || defined(INTEL_MKL_DNN_ONLY) +// MKL_ML registers its own complex64/128 kernels in mkl_batch_matmul_op.cc +// if defined(INTEL_MKL) && !defined(INTEL_MKL_DNN_ONLY) && defined(ENABLE_MKL). +// Anything else (the complement) should register the TF ones. +// (MKL-DNN doesn't implement these kernels either.) +#if !defined(INTEL_MKL) || defined(INTEL_MKL_DNN_ONLY) || !defined(ENABLE_MKL) TF_CALL_complex64(REGISTER_BATCH_MATMUL_CPU); TF_CALL_complex128(REGISTER_BATCH_MATMUL_CPU); -#endif +#endif // !INTEL_MKL || INTEL_MKL_DNN_ONLY || !ENABLE_MKL #if GOOGLE_CUDA TF_CALL_complex64(REGISTER_BATCH_MATMUL_GPU); TF_CALL_complex128(REGISTER_BATCH_MATMUL_GPU); -#endif +#endif // GOOGLE_CUDA } // namespace tensorflow diff --git a/tensorflow/core/kernels/batch_matmul_op_real.cc b/tensorflow/core/kernels/batch_matmul_op_real.cc index 584b507c70..25ae795d8e 100644 --- a/tensorflow/core/kernels/batch_matmul_op_real.cc +++ b/tensorflow/core/kernels/batch_matmul_op_real.cc @@ -21,10 +21,15 @@ limitations under the License. namespace tensorflow { -#if !defined(INTEL_MKL) || defined(INTEL_MKL_DNN_ONLY) +// MKL_ML registers its own float and double kernels in mkl_batch_matmul_op.cc +// if defined(INTEL_MKL) && !defined(INTEL_MKL_DNN_ONLY) && defined(ENABLE_MKL). +// Anything else (the complement) should register the TF ones. +// (MKL-DNN doesn't implement these kernels either.) +#if !defined(INTEL_MKL) || defined(INTEL_MKL_DNN_ONLY) || !defined(ENABLE_MKL) TF_CALL_float(REGISTER_BATCH_MATMUL_CPU); TF_CALL_double(REGISTER_BATCH_MATMUL_CPU); -#endif +#endif // !INTEL_MKL || INTEL_MKL_DNN_ONLY || !ENABLE_MKL + TF_CALL_half(REGISTER_BATCH_MATMUL_CPU); TF_CALL_int32(REGISTER_BATCH_MATMUL_CPU); diff --git a/tensorflow/core/kernels/cwise_ops_common.cc b/tensorflow/core/kernels/cwise_ops_common.cc index 980edffceb..8ad3b4d1fc 100644 --- a/tensorflow/core/kernels/cwise_ops_common.cc +++ b/tensorflow/core/kernels/cwise_ops_common.cc @@ -20,9 +20,9 @@ namespace tensorflow { BinaryOpShared::BinaryOpShared(OpKernelConstruction* ctx, DataType out, DataType in) : OpKernel(ctx) { -#ifndef INTEL_MKL +#if !defined(INTEL_MKL) || !defined(ENABLE_MKL) OP_REQUIRES_OK(ctx, ctx->MatchSignature({in, in}, {out})); -#endif +#endif // !INTEL_MKL || !ENABLE_MKL } void BinaryOpShared::SetUnimplementedError(OpKernelContext* ctx) { diff --git a/tensorflow/core/kernels/gather_nd_op_cpu_impl.h b/tensorflow/core/kernels/gather_nd_op_cpu_impl.h index 277ee2be02..1c78de253e 100644 --- a/tensorflow/core/kernels/gather_nd_op_cpu_impl.h +++ b/tensorflow/core/kernels/gather_nd_op_cpu_impl.h @@ -114,7 +114,7 @@ struct GatherNdSlice { generator::GatherNdSliceGenerator gather_nd_generator( slice_size, Tindices, Tparams, Tout, &error_loc); -#ifdef INTEL_MKL +#if defined(INTEL_MKL) && defined(ENABLE_MKL) // Eigen implementation below is not highly performant. gather_nd_generator // does not seem to be called in parallel, leading to very poor performance. // Additionally, since it uses scalar (Tscratch) to invoke 'generate', it @@ -126,12 +126,12 @@ struct GatherNdSlice { const Eigen::array loc{i}; gather_nd_generator(loc); } -#else // INTEL_MKL +#else // INTEL_MKL && ENABLE_MKL Tscratch.device(d) = Tscratch.reshape(reshape_dims) .broadcast(broadcast_dims) .generate(gather_nd_generator) .sum(); -#endif +#endif // INTEL_MKL && ENABLE_MKL // error_loc() returns -1 if there's no out-of-bounds index, // otherwise it returns the location of an OOB index in Tindices. diff --git a/tensorflow/core/kernels/matmul_op.cc b/tensorflow/core/kernels/matmul_op.cc index 79967aab38..4ad390a411 100644 --- a/tensorflow/core/kernels/matmul_op.cc +++ b/tensorflow/core/kernels/matmul_op.cc @@ -578,7 +578,7 @@ struct MatMulFunctor { .Label("cublas"), \ MatMulOp) -#if defined(INTEL_MKL) +#if defined(INTEL_MKL) && defined(ENABLE_MKL) // MKL does not support half, bfloat16 and int32 types for // matrix-multiplication, so register the kernel to use default Eigen based @@ -606,9 +606,9 @@ TF_CALL_double(REGISTER_CPU); TF_CALL_complex64(REGISTER_CPU_EIGEN); TF_CALL_complex128(REGISTER_CPU_EIGEN); TF_CALL_double(REGISTER_CPU_EIGEN); -#endif +#endif // INTEL_MKL_DNN_ONLY -#else // INTEL MKL +#else // INTEL_MKL && ENABLE_MKL TF_CALL_float(REGISTER_CPU); TF_CALL_double(REGISTER_CPU); TF_CALL_half(REGISTER_CPU); @@ -616,7 +616,7 @@ TF_CALL_bfloat16(REGISTER_CPU); TF_CALL_int32(REGISTER_CPU); TF_CALL_complex64(REGISTER_CPU); TF_CALL_complex128(REGISTER_CPU); -#endif +#endif // INTEL_MKL && ENABLE_MKL #if GOOGLE_CUDA TF_CALL_float(REGISTER_GPU); diff --git a/tensorflow/core/kernels/mkl_batch_matmul_op.cc b/tensorflow/core/kernels/mkl_batch_matmul_op.cc index 0841395dc3..bc135de11e 100644 --- a/tensorflow/core/kernels/mkl_batch_matmul_op.cc +++ b/tensorflow/core/kernels/mkl_batch_matmul_op.cc @@ -223,10 +223,12 @@ class BatchMatMulMkl : public OpKernel { Name("BatchMatMul").Device(DEVICE_CPU).TypeConstraint("T"), \ BatchMatMulMkl) +#ifdef ENABLE_MKL TF_CALL_float(REGISTER_BATCH_MATMUL_MKL); TF_CALL_double(REGISTER_BATCH_MATMUL_MKL); TF_CALL_complex64(REGISTER_BATCH_MATMUL_MKL); TF_CALL_complex128(REGISTER_BATCH_MATMUL_MKL); +#endif // ENABLE_MKL } // end namespace tensorflow #endif diff --git a/tensorflow/core/kernels/mkl_matmul_op.cc b/tensorflow/core/kernels/mkl_matmul_op.cc index 077d62ce32..f4788f4851 100644 --- a/tensorflow/core/kernels/mkl_matmul_op.cc +++ b/tensorflow/core/kernels/mkl_matmul_op.cc @@ -217,7 +217,7 @@ class MklMatMulOp : public OpKernel { reinterpret_cast(b), ldb, &beta, reinterpret_cast(c), ldc); } -#endif +#endif // !INTEL_MKL_DNN_ONLY }; #define REGISTER_CPU(T) \ @@ -225,6 +225,7 @@ class MklMatMulOp : public OpKernel { Name("MatMul").Device(DEVICE_CPU).TypeConstraint("T"), \ MklMatMulOp); +#ifdef ENABLE_MKL // TODO(inteltf) Consider template specialization when adding/removing // additional types TF_CALL_float(REGISTER_CPU); @@ -233,7 +234,8 @@ TF_CALL_float(REGISTER_CPU); TF_CALL_double(REGISTER_CPU); TF_CALL_complex64(REGISTER_CPU); TF_CALL_complex128(REGISTER_CPU); -#endif +#endif // !INTEL_MKL_DNN_ONLY +#endif // ENABLE_MKL } // namespace tensorflow #endif // INTEL_MKL diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc index 77594479cb..97f77e45b6 100644 --- a/tensorflow/core/kernels/slice_op.cc +++ b/tensorflow/core/kernels/slice_op.cc @@ -411,7 +411,7 @@ class MklSliceOp : public OpKernel { context->input(0).tensor(), indices, sizes); } }; -#endif +#endif // INTEL_MKL // Forward declarations of the functor specializations for declared in the // sharded source files. @@ -440,18 +440,14 @@ TF_CALL_ALL_TYPES(DECLARE_FOR_N); #undef DECLARE_CPU_SPEC } // namespace functor -#ifndef INTEL_MKL +#if defined(INTEL_MKL) && defined(ENABLE_MKL) #define REGISTER_SLICE(type) \ REGISTER_KERNEL_BUILDER(Name("Slice") \ .Device(DEVICE_CPU) \ .TypeConstraint("T") \ .HostMemory("begin") \ .HostMemory("size"), \ - SliceOp) - -TF_CALL_POD_STRING_TYPES(REGISTER_SLICE); -TF_CALL_QUANTIZED_TYPES(REGISTER_SLICE); -#undef REGISTER_SLICE + MklSliceOp) #else #define REGISTER_SLICE(type) \ REGISTER_KERNEL_BUILDER(Name("Slice") \ @@ -459,12 +455,12 @@ TF_CALL_QUANTIZED_TYPES(REGISTER_SLICE); .TypeConstraint("T") \ .HostMemory("begin") \ .HostMemory("size"), \ - MklSliceOp) + SliceOp) +#endif // INTEL_MKL && ENABLE_MKL TF_CALL_POD_STRING_TYPES(REGISTER_SLICE); TF_CALL_QUANTIZED_TYPES(REGISTER_SLICE); #undef REGISTER_SLICE -#endif // INTEL_MKL #if GOOGLE_CUDA // Forward declarations of the functor specializations for GPU. diff --git a/tensorflow/core/kernels/transpose_op.cc b/tensorflow/core/kernels/transpose_op.cc index 0f0f65c5a3..48e392c070 100644 --- a/tensorflow/core/kernels/transpose_op.cc +++ b/tensorflow/core/kernels/transpose_op.cc @@ -218,7 +218,7 @@ Status ConjugateTransposeCpuOp::DoTranspose(OpKernelContext* ctx, perm, out); } -#if defined(INTEL_MKL) +#if defined(INTEL_MKL) && defined(ENABLE_MKL) #define REGISTER(T) \ REGISTER_KERNEL_BUILDER(Name("Transpose") \ .Device(DEVICE_CPU) \ @@ -230,11 +230,8 @@ Status ConjugateTransposeCpuOp::DoTranspose(OpKernelContext* ctx, .TypeConstraint("T") \ .HostMemory("perm"), \ MklConjugateTransposeCpuOp); -TF_CALL_ALL_TYPES(REGISTER); -#undef REGISTER - -#else // INTEL_MKL +#else // INTEL_MKL && ENABLE_MKL #define REGISTER(T) \ REGISTER_KERNEL_BUILDER(Name("Transpose") \ .Device(DEVICE_CPU) \ @@ -246,9 +243,10 @@ TF_CALL_ALL_TYPES(REGISTER); .TypeConstraint("T") \ .HostMemory("perm"), \ ConjugateTransposeCpuOp); +#endif // INTEL_MKL && ENABLE_MKL + TF_CALL_ALL_TYPES(REGISTER) #undef REGISTER -#endif // INTEL_MKL #if GOOGLE_CUDA Status TransposeGpuOp::DoTranspose(OpKernelContext* ctx, const Tensor& in, diff --git a/tensorflow/core/util/port.cc b/tensorflow/core/util/port.cc index c081ceae57..e01058dff6 100644 --- a/tensorflow/core/util/port.cc +++ b/tensorflow/core/util/port.cc @@ -38,10 +38,10 @@ bool CudaSupportsHalfMatMulAndConv() { } bool IsMklEnabled() { -#ifdef INTEL_MKL +#if defined(INTEL_MKL) && defined(ENABLE_MKL) return true; #else return false; -#endif +#endif // INTEL_MKL && ENABLE_MKL } } // end namespace tensorflow diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 7ddaf7806e..d6c75d675c 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -22,6 +22,7 @@ load( ) load( "//third_party/mkl:build_defs.bzl", + "if_enable_mkl", "if_mkl", "if_mkl_lnx_x64", "if_mkl_ml", @@ -237,6 +238,7 @@ def tf_copts(android_optimization_level_override = "-O2", is_external = False): if_tensorrt(["-DGOOGLE_TENSORRT=1"]) + if_mkl(["-DINTEL_MKL=1", "-DEIGEN_USE_VML"]) + if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) + + if_enable_mkl(["-DENABLE_MKL"]) + if_ngraph(["-DINTEL_NGRAPH=1"]) + if_mkl_lnx_x64(["-fopenmp"]) + if_android_arm(["-mfpu=neon"]) + @@ -1082,6 +1084,7 @@ def tf_cuda_library(deps = None, cuda_deps = None, copts = tf_copts(), **kwargs) ]), copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_mkl(["-DINTEL_MKL=1"]) + if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) + + if_enable_mkl(["-DENABLE_MKL"]) + if_tensorrt(["-DGOOGLE_TENSORRT=1"])), **kwargs ) diff --git a/third_party/mkl/BUILD b/third_party/mkl/BUILD index efff7fd51b..15a3e5cfa7 100644 --- a/third_party/mkl/BUILD +++ b/third_party/mkl/BUILD @@ -1,26 +1,26 @@ licenses(["notice"]) # 3-Clause BSD config_setting( - name = "using_mkl", + name = "build_with_mkl", define_values = { - "using_mkl": "true", + "build_with_mkl": "true", }, visibility = ["//visibility:public"], ) config_setting( - name = "using_mkl_ml_only", + name = "build_with_mkl_ml_only", define_values = { - "using_mkl": "true", - "using_mkl_ml_only": "true", + "build_with_mkl": "true", + "build_with_mkl_ml_only": "true", }, visibility = ["//visibility:public"], ) config_setting( - name = "using_mkl_lnx_x64", + name = "build_with_mkl_lnx_x64", define_values = { - "using_mkl": "true", + "build_with_mkl": "true", }, values = { "cpu": "k8", @@ -28,6 +28,15 @@ config_setting( visibility = ["//visibility:public"], ) +config_setting( + name = "enable_mkl", + define_values = { + "enable_mkl": "true", + "build_with_mkl": "true", + }, + visibility = ["//visibility:public"], +) + load( "//third_party/mkl:build_defs.bzl", "if_mkl", diff --git a/third_party/mkl/build_defs.bzl b/third_party/mkl/build_defs.bzl index b645c0fc5c..bb798e715a 100644 --- a/third_party/mkl/build_defs.bzl +++ b/third_party/mkl/build_defs.bzl @@ -1,9 +1,11 @@ # -*- Python -*- """Skylark macros for MKL. -if_mkl is a conditional to check if MKL is enabled or not. -if_mkl_ml is a conditional to check if MKL-ML is enabled. + +if_mkl is a conditional to check if we are building with MKL. +if_mkl_ml is a conditional to check if we are building with MKL-ML. if_mkl_ml_only is a conditional to check for MKL-ML-only (no MKL-DNN) mode. if_mkl_lnx_x64 is a conditional to check for MKL +if_enable_mkl is a conditional to check if building with MKL and MKL is enabled. mkl_repository is a repository rule for creating MKL repository rule that can be pointed to either a local folder, or download it from the internet. @@ -24,7 +26,7 @@ def if_mkl(if_true, if_false = []): a select evaluating to either if_true or if_false as appropriate. """ return select({ - str(Label("//third_party/mkl:using_mkl")): if_true, + str(Label("//third_party/mkl:build_with_mkl")): if_true, "//conditions:default": if_false, }) @@ -40,8 +42,8 @@ def if_mkl_ml(if_true, if_false = []): a select evaluating to either if_true or if_false as appropriate. """ return select({ - str(Label("//third_party/mkl_dnn:using_mkl_dnn_only")): if_false, - str(Label("//third_party/mkl:using_mkl")): if_true, + str(Label("//third_party/mkl_dnn:build_with_mkl_dnn_only")): if_false, + str(Label("//third_party/mkl:build_with_mkl")): if_true, "//conditions:default": if_false, }) @@ -56,12 +58,12 @@ def if_mkl_ml_only(if_true, if_false = []): a select evaluating to either if_true or if_false as appropriate. """ return select({ - str(Label("//third_party/mkl:using_mkl_ml_only")): if_true, + str(Label("//third_party/mkl:build_with_mkl_ml_only")): if_true, "//conditions:default": if_false, }) def if_mkl_lnx_x64(if_true, if_false = []): - """Shorthand to select() on if MKL is on and the target is Linux x86-64. + """Shorthand to select() if building with MKL and the target is Linux x86-64. Args: if_true: expression to evaluate if building with MKL is enabled and the @@ -73,7 +75,24 @@ def if_mkl_lnx_x64(if_true, if_false = []): a select evaluating to either if_true or if_false as appropriate. """ return select({ - str(Label("//third_party/mkl:using_mkl_lnx_x64")): if_true, + str(Label("//third_party/mkl:build_with_mkl_lnx_x64")): if_true, + "//conditions:default": if_false, + }) + +def if_enable_mkl(if_true, if_false = []): + """Shorthand to select() if we are building with MKL and MKL is enabled. + + This is only effective when built with MKL. + + Args: + if_true: expression to evaluate if building with MKL and MKL is enabled + if_false: expression to evaluate if building without MKL or MKL is not enabled. + + Returns: + A select evaluating to either if_true or if_false as appropriate. + """ + return select({ + "//third_party/mkl:enable_mkl": if_true, "//conditions:default": if_false, }) @@ -87,9 +106,9 @@ def mkl_deps(): inclusion in the deps attribute of rules. """ return select({ - str(Label("//third_party/mkl_dnn:using_mkl_dnn_only")): ["@mkl_dnn"], - str(Label("//third_party/mkl:using_mkl_ml_only")): ["//third_party/mkl:intel_binary_blob"], - str(Label("//third_party/mkl:using_mkl")): [ + str(Label("//third_party/mkl_dnn:build_with_mkl_dnn_only")): ["@mkl_dnn"], + str(Label("//third_party/mkl:build_with_mkl_ml_only")): ["//third_party/mkl:intel_binary_blob"], + str(Label("//third_party/mkl:build_with_mkl")): [ "//third_party/mkl:intel_binary_blob", "@mkl_dnn", ], diff --git a/third_party/mkl_dnn/BUILD b/third_party/mkl_dnn/BUILD index 3e567fa9fc..58ecda55e6 100644 --- a/third_party/mkl_dnn/BUILD +++ b/third_party/mkl_dnn/BUILD @@ -3,10 +3,10 @@ licenses(["notice"]) exports_files(["LICENSE"]) config_setting( - name = "using_mkl_dnn_only", + name = "build_with_mkl_dnn_only", define_values = { - "using_mkl": "true", - "using_mkl_dnn_only": "true", + "build_with_mkl": "true", + "build_with_mkl_dnn_only": "true", }, visibility = ["//visibility:public"], ) diff --git a/third_party/mkl_dnn/build_defs.bzl b/third_party/mkl_dnn/build_defs.bzl index 7ce2a7d9b0..6388f31971 100644 --- a/third_party/mkl_dnn/build_defs.bzl +++ b/third_party/mkl_dnn/build_defs.bzl @@ -8,6 +8,6 @@ def if_mkl_open_source_only(if_true, if_false = []): """ return select({ - str(Label("//third_party/mkl_dnn:using_mkl_dnn_only")): if_true, + str(Label("//third_party/mkl_dnn:build_with_mkl_dnn_only")): if_true, "//conditions:default": if_false, }) diff --git a/tools/bazel.rc b/tools/bazel.rc index ccf62629d1..6747c7e795 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -24,12 +24,13 @@ build --define framework_shared_object=true # Please note that MKL on MacOS or windows is still not supported. # If you would like to use a local MKL instead of downloading, please set the # environment variable "TF_MKL_ROOT" every time before build. -build:mkl --define=using_mkl=true +build:mkl --define=build_with_mkl=true --define=enable_mkl=true build:mkl -c opt # This config option is used to enable MKL-DNN open source library only, # without depending on MKL binary version. -build:mkl_open_source_only --define=using_mkl_dnn_only=true +build:mkl_open_source_only --define=build_with_mkl_dnn_only=true +build:mkl_open_source_only --define=build_with_mkl=true --define=enable_mkl=true build:download_clang --crosstool_top=@local_config_download_clang//:toolchain build:download_clang --define=using_clang=true -- cgit v1.2.3 From ffa90fc521c6051addd50236872a4afaa45e0a49 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 26 Sep 2018 01:38:55 -0700 Subject: Fixes for building with CUDA on ppc64le. PiperOrigin-RevId: 214569615 --- configure.py | 188 +++++++++++++++++++----------------- third_party/gpus/cuda_configure.bzl | 2 + 2 files changed, 101 insertions(+), 89 deletions(-) diff --git a/configure.py b/configure.py index f0b9fada5e..1064f6a9d4 100644 --- a/configure.py +++ b/configure.py @@ -41,7 +41,6 @@ _DEFAULT_CUDA_PATH = '/usr/local/cuda' _DEFAULT_CUDA_PATH_LINUX = '/opt/cuda' _DEFAULT_CUDA_PATH_WIN = ('C:/Program Files/NVIDIA GPU Computing ' 'Toolkit/CUDA/v%s' % _DEFAULT_CUDA_VERSION) -_DEFAULT_TENSORRT_PATH_LINUX = '/usr/lib/%s-linux-gnu' % platform.machine() _TF_OPENCL_VERSION = '1.2' _DEFAULT_COMPUTECPP_TOOLKIT_PATH = '/usr/local/computecpp' _DEFAULT_TRISYCL_INCLUDE_DIR = '/usr/local/triSYCL/include' @@ -54,6 +53,11 @@ _TF_BAZELRC_FILENAME = '.tf_configure.bazelrc' _TF_BAZELRC = os.path.join(_TF_WORKSPACE_ROOT, _TF_BAZELRC_FILENAME) _TF_WORKSPACE = os.path.join(_TF_WORKSPACE_ROOT, 'WORKSPACE') +if platform.machine() == 'ppc64le': + _DEFAULT_TENSORRT_PATH_LINUX = '/usr/lib/powerpc64le-linux-gnu/' +else: + _DEFAULT_TENSORRT_PATH_LINUX = '/usr/lib/%s-linux-gnu' % platform.machine() + class UserInputError(Exception): pass @@ -153,14 +157,18 @@ def get_python_path(environ_cp, python_bin_path): if environ_cp.get('PYTHONPATH'): python_paths = environ_cp.get('PYTHONPATH').split(':') try: - library_paths = run_shell( - [python_bin_path, '-c', - 'import site; print("\\n".join(site.getsitepackages()))']).split('\n') + library_paths = run_shell([ + python_bin_path, '-c', + 'import site; print("\\n".join(site.getsitepackages()))' + ]).split('\n') except subprocess.CalledProcessError: - library_paths = [run_shell( - [python_bin_path, '-c', - 'from distutils.sysconfig import get_python_lib;' - 'print(get_python_lib())'])] + library_paths = [ + run_shell([ + python_bin_path, '-c', + 'from distutils.sysconfig import get_python_lib;' + 'print(get_python_lib())' + ]) + ] all_paths = set(python_paths + library_paths) @@ -187,8 +195,7 @@ def setup_python(environ_cp): environ_cp, 'PYTHON_BIN_PATH', ask_python_bin_path, default_python_bin_path) # Check if the path is valid - if os.path.isfile(python_bin_path) and os.access( - python_bin_path, os.X_OK): + if os.path.isfile(python_bin_path) and os.access(python_bin_path, os.X_OK): break elif not os.path.exists(python_bin_path): print('Invalid python path: %s cannot be found.' % python_bin_path) @@ -230,8 +237,9 @@ def setup_python(environ_cp): environ_cp['PYTHON_BIN_PATH'] = python_bin_path # Write tools/python_bin_path.sh - with open(os.path.join( - _TF_WORKSPACE_ROOT, 'tools', 'python_bin_path.sh'), 'w') as f: + with open( + os.path.join(_TF_WORKSPACE_ROOT, 'tools', 'python_bin_path.sh'), + 'w') as f: f.write('export PYTHON_BIN_PATH="%s"' % python_bin_path) @@ -250,7 +258,7 @@ def reset_tf_configure_bazelrc(workspace_path): continue f.write('%s\n' % l) if is_windows(): - tf_bazelrc_path = _TF_BAZELRC.replace("\\", "/") + tf_bazelrc_path = _TF_BAZELRC.replace('\\', '/') else: tf_bazelrc_path = _TF_BAZELRC f.write('import %s\n' % tf_bazelrc_path) @@ -261,8 +269,8 @@ def cleanup_makefile(): These files could interfere with Bazel parsing. """ - makefile_download_dir = os.path.join( - _TF_WORKSPACE_ROOT, 'tensorflow', 'contrib', 'makefile', 'downloads') + makefile_download_dir = os.path.join(_TF_WORKSPACE_ROOT, 'tensorflow', + 'contrib', 'makefile', 'downloads') if os.path.isdir(makefile_download_dir): for root, _, filenames in os.walk(makefile_download_dir): for f in filenames: @@ -330,9 +338,8 @@ def get_var(environ_cp, 'Environment variable %s must be set as a boolean indicator.\n' 'The following are accepted as TRUE : %s.\n' 'The following are accepted as FALSE: %s.\n' - 'Current value is %s.' % ( - var_name, ', '.join(true_strings), ', '.join(false_strings), - var)) + 'Current value is %s.' % (var_name, ', '.join(true_strings), + ', '.join(false_strings), var)) while var is None: user_input_origin = get_input(question) @@ -355,8 +362,12 @@ def get_var(environ_cp, return var -def set_build_var(environ_cp, var_name, query_item, option_name, - enabled_by_default, bazel_config_name=None): +def set_build_var(environ_cp, + var_name, + query_item, + option_name, + enabled_by_default, + bazel_config_name=None): """Set if query_item will be enabled for the build. Ask user if query_item will be enabled. Default is used if no input is given. @@ -379,8 +390,8 @@ def set_build_var(environ_cp, var_name, query_item, option_name, elif bazel_config_name is not None: # TODO(mikecase): Migrate all users of configure.py to use --config Bazel # options and not to set build configs through environment variables. - write_to_bazelrc('build:%s --define %s=true' - % (bazel_config_name, option_name)) + write_to_bazelrc( + 'build:%s --define %s=true' % (bazel_config_name, option_name)) def set_action_env_var(environ_cp, @@ -447,7 +458,8 @@ def check_bazel_version(min_version): if which('bazel') is None: print('Cannot find bazel. Please install bazel.') sys.exit(0) - curr_version = run_shell(['bazel', '--batch', '--bazelrc=/dev/null', 'version']) + curr_version = run_shell( + ['bazel', '--batch', '--bazelrc=/dev/null', 'version']) for line in curr_version.split('\n'): if 'Build label: ' in line: @@ -499,6 +511,7 @@ def set_cc_opt_flags(environ_cp): write_to_bazelrc('build:opt --host_copt=-march=native') write_to_bazelrc('build:opt --define with_default_optimizations=true') + def set_tf_cuda_clang(environ_cp): """set TF_CUDA_CLANG action_env. @@ -581,16 +594,14 @@ def set_clang_cuda_compiler_path(environ_cp): clang_cuda_compiler_path) -def prompt_loop_or_load_from_env( - environ_cp, - var_name, - var_default, - ask_for_var, - check_success, - error_msg, - suppress_default_error=False, - n_ask_attempts=_DEFAULT_PROMPT_ASK_ATTEMPTS -): +def prompt_loop_or_load_from_env(environ_cp, + var_name, + var_default, + ask_for_var, + check_success, + error_msg, + suppress_default_error=False, + n_ask_attempts=_DEFAULT_PROMPT_ASK_ATTEMPTS): """Loop over user prompts for an ENV param until receiving a valid response. For the env param var_name, read from the environment or verify user input @@ -629,9 +640,7 @@ def prompt_loop_or_load_from_env( ) for _ in range(n_ask_attempts): - val = get_from_env_or_user_or_default(environ_cp, - var_name, - full_query, + val = get_from_env_or_user_or_default(environ_cp, var_name, full_query, default) if check_success(val): break @@ -639,9 +648,9 @@ def prompt_loop_or_load_from_env( print(error_msg % val) environ_cp[var_name] = '' else: - raise UserInputError('Invalid %s setting was provided %d times in a row. ' - 'Assuming to be a scripting mistake.' % - (var_name, n_ask_attempts)) + raise UserInputError( + 'Invalid %s setting was provided %d times in a row. ' + 'Assuming to be a scripting mistake.' % (var_name, n_ask_attempts)) environ_cp[var_name] = val return val @@ -650,8 +659,8 @@ def prompt_loop_or_load_from_env( def create_android_ndk_rule(environ_cp): """Set ANDROID_NDK_HOME and write Android NDK WORKSPACE rule.""" if is_windows() or is_cygwin(): - default_ndk_path = cygpath('%s/Android/Sdk/ndk-bundle' % - environ_cp['APPDATA']) + default_ndk_path = cygpath( + '%s/Android/Sdk/ndk-bundle' % environ_cp['APPDATA']) elif is_macos(): default_ndk_path = '%s/library/Android/Sdk/ndk-bundle' % environ_cp['HOME'] else: @@ -668,8 +677,7 @@ def create_android_ndk_rule(environ_cp): ask_for_var='Please specify the home path of the Android NDK to use.', check_success=valid_ndk_path, error_msg=('The path %s or its child file "source.properties" ' - 'does not exist.') - ) + 'does not exist.')) write_action_env_to_bazelrc('ANDROID_NDK_HOME', android_ndk_home_path) write_action_env_to_bazelrc('ANDROID_NDK_API_LEVEL', check_ndk_level(android_ndk_home_path)) @@ -703,9 +711,9 @@ def create_android_sdk_rule(environ_cp): api_levels = [x.replace('android-', '') for x in api_levels] def valid_api_level(api_level): - return os.path.exists(os.path.join(android_sdk_home_path, - 'platforms', - 'android-' + api_level)) + return os.path.exists( + os.path.join(android_sdk_home_path, 'platforms', + 'android-' + api_level)) android_api_level = prompt_loop_or_load_from_env( environ_cp, @@ -720,9 +728,8 @@ def create_android_sdk_rule(environ_cp): versions = sorted(os.listdir(build_tools)) def valid_build_tools(version): - return os.path.exists(os.path.join(android_sdk_home_path, - 'build-tools', - version)) + return os.path.exists( + os.path.join(android_sdk_home_path, 'build-tools', version)) android_build_tools_version = prompt_loop_or_load_from_env( environ_cp, @@ -736,10 +743,8 @@ def create_android_sdk_rule(environ_cp): write_action_env_to_bazelrc('ANDROID_BUILD_TOOLS_VERSION', android_build_tools_version) - write_action_env_to_bazelrc('ANDROID_SDK_API_LEVEL', - android_api_level) - write_action_env_to_bazelrc('ANDROID_SDK_HOME', - android_sdk_home_path) + write_action_env_to_bazelrc('ANDROID_SDK_API_LEVEL', android_api_level) + write_action_env_to_bazelrc('ANDROID_SDK_HOME', android_sdk_home_path) def check_ndk_level(android_ndk_home_path): @@ -798,6 +803,7 @@ def reformat_version_sequence(version_str, sequence_count): Args: version_str: String, the version string. sequence_count: int, an integer. + Returns: string, reformatted version string. """ @@ -841,12 +847,19 @@ def set_tf_cuda_version(environ_cp): if is_windows(): cuda_rt_lib_paths = ['lib/x64/cudart.lib'] elif is_linux(): - cuda_rt_lib_paths = ['%s/libcudart.so.%s' % (x, tf_cuda_version) - for x in ['lib64', 'lib/x86_64-linux-gnu']] + cuda_rt_lib_paths = [ + '%s/libcudart.so.%s' % (x, tf_cuda_version) for x in [ + 'lib64', + 'lib/powerpc64le-linux-gnu', + 'lib/x86_64-linux-gnu', + ] + ] elif is_macos(): cuda_rt_lib_paths = ['lib/libcudart.%s.dylib' % tf_cuda_version] - cuda_toolkit_paths_full = [os.path.join(cuda_toolkit_path, x) for x in cuda_rt_lib_paths] + cuda_toolkit_paths_full = [ + os.path.join(cuda_toolkit_path, x) for x in cuda_rt_lib_paths + ] if any([os.path.exists(x) for x in cuda_toolkit_paths_full]): break @@ -919,8 +932,8 @@ def set_tf_cudnn_version(environ_cp): cudnn_path_from_ldconfig) if cudnn_path_from_ldconfig: cudnn_path_from_ldconfig = cudnn_path_from_ldconfig.group(1) - if os.path.exists('%s.%s' % (cudnn_path_from_ldconfig, - tf_cudnn_version)): + if os.path.exists( + '%s.%s' % (cudnn_path_from_ldconfig, tf_cudnn_version)): cudnn_install_path = os.path.dirname(cudnn_path_from_ldconfig) break @@ -1166,6 +1179,7 @@ def get_native_cuda_compute_capabilities(environ_cp): Args: environ_cp: copy of the os.environ. + Returns: string of native cuda compute capabilities, separated by comma. """ @@ -1290,8 +1304,7 @@ def set_computecpp_toolkit_path(environ_cp): else: sycl_rt_lib_path = '' - sycl_rt_lib_path_full = os.path.join(toolkit_path, - sycl_rt_lib_path) + sycl_rt_lib_path_full = os.path.join(toolkit_path, sycl_rt_lib_path) exists = os.path.exists(sycl_rt_lib_path_full) if not exists: print('Invalid SYCL %s library path. %s cannot be found' % @@ -1319,8 +1332,8 @@ def set_trisycl_include_dir(environ_cp): ask_trisycl_include_dir = ('Please specify the location of the triSYCL ' 'include directory. (Use --config=sycl_trisycl ' 'when building with Bazel) ' - '[Default is %s]: ' - ) % (_DEFAULT_TRISYCL_INCLUDE_DIR) + '[Default is %s]: ') % ( + _DEFAULT_TRISYCL_INCLUDE_DIR) while True: trisycl_include_dir = get_from_env_or_user_or_default( @@ -1329,13 +1342,12 @@ def set_trisycl_include_dir(environ_cp): if os.path.exists(trisycl_include_dir): break - print('Invalid triSYCL include directory, %s cannot be found' - % (trisycl_include_dir)) + print('Invalid triSYCL include directory, %s cannot be found' % + (trisycl_include_dir)) # Set TRISYCL_INCLUDE_DIR environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir - write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', - trisycl_include_dir) + write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', trisycl_include_dir) def set_mpi_home(environ_cp): @@ -1345,8 +1357,9 @@ def set_mpi_home(environ_cp): default_mpi_home = os.path.dirname(os.path.dirname(default_mpi_home)) def valid_mpi_path(mpi_home): - exists = (os.path.exists(os.path.join(mpi_home, 'include')) and - os.path.exists(os.path.join(mpi_home, 'lib'))) + exists = ( + os.path.exists(os.path.join(mpi_home, 'include')) and + os.path.exists(os.path.join(mpi_home, 'lib'))) if not exists: print('Invalid path to the MPI Toolkit. %s or %s cannot be found' % (os.path.join(mpi_home, 'include'), @@ -1434,11 +1447,9 @@ def set_windows_build_flags(environ_cp): if get_var( environ_cp, 'TF_OVERRIDE_EIGEN_STRONG_INLINE', 'Eigen strong inline', - True, - ('Would you like to override eigen strong inline for some C++ ' - 'compilation to reduce the compilation time?'), - 'Eigen strong inline overridden.', - 'Not overriding eigen strong inline, ' + True, ('Would you like to override eigen strong inline for some C++ ' + 'compilation to reduce the compilation time?'), + 'Eigen strong inline overridden.', 'Not overriding eigen strong inline, ' 'some compilations could take more than 20 mins.'): # Due to a known MSVC compiler issue # https://github.com/tensorflow/tensorflow/issues/10521 @@ -1455,10 +1466,11 @@ def config_info_line(name, help_text): def main(): parser = argparse.ArgumentParser() - parser.add_argument("--workspace", - type=str, - default=_TF_WORKSPACE_ROOT, - help="The absolute path to your active Bazel workspace.") + parser.add_argument( + '--workspace', + type=str, + default=_TF_WORKSPACE_ROOT, + help='The absolute path to your active Bazel workspace.') args = parser.parse_args() # Make a copy of os.environ to be clear when functions and getting and setting @@ -1500,7 +1512,7 @@ def main(): # runtime to allow the Tensorflow testcases which compare numpy # results to Tensorflow results to succeed. if is_ppc64le(): - write_action_env_to_bazelrc("OMP_NUM_THREADS", 1) + write_action_env_to_bazelrc('OMP_NUM_THREADS', 1) set_build_var(environ_cp, 'TF_NEED_JEMALLOC', 'jemalloc as malloc', 'with_jemalloc', True) @@ -1514,12 +1526,12 @@ def main(): 'with_kafka_support', True, 'kafka') set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support', False, 'xla') - set_build_var(environ_cp, 'TF_NEED_GDR', 'GDR', 'with_gdr_support', - False, 'gdr') + set_build_var(environ_cp, 'TF_NEED_GDR', 'GDR', 'with_gdr_support', False, + 'gdr') set_build_var(environ_cp, 'TF_NEED_VERBS', 'VERBS', 'with_verbs_support', False, 'verbs') - set_build_var(environ_cp, 'TF_NEED_NGRAPH', 'nGraph', - 'with_ngraph_support', False, 'ngraph') + set_build_var(environ_cp, 'TF_NEED_NGRAPH', 'nGraph', 'with_ngraph_support', + False, 'ngraph') set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False) if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1': @@ -1585,13 +1597,10 @@ def main(): # Add a config option to build TensorFlow 2.0 API. write_to_bazelrc('build:v2 --define=tf_api_version=2') - if get_var( - environ_cp, 'TF_SET_ANDROID_WORKSPACE', 'android workspace', - False, - ('Would you like to interactively configure ./WORKSPACE for ' - 'Android builds?'), - 'Searching for NDK and SDK installations.', - 'Not configuring the WORKSPACE for Android builds.'): + if get_var(environ_cp, 'TF_SET_ANDROID_WORKSPACE', 'android workspace', False, + ('Would you like to interactively configure ./WORKSPACE for ' + 'Android builds?'), 'Searching for NDK and SDK installations.', + 'Not configuring the WORKSPACE for Android builds.'): create_android_ndk_rule(environ_cp) create_android_sdk_rule(environ_cp) @@ -1605,5 +1614,6 @@ def main(): config_info_line('mkl', 'Build with MKL support.') config_info_line('monolithic', 'Config for mostly static monolithic build.') + if __name__ == '__main__': main() diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index 5648b1525a..f5fdd3a75e 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -48,6 +48,7 @@ _DEFAULT_CUDA_COMPUTE_CAPABILITIES = ["3.5", "5.2"] CUDA_LIB_PATHS = [ "lib64/", "lib64/stubs/", + "lib/powerpc64le-linux-gnu/", "lib/x86_64-linux-gnu/", "lib/x64/", "lib/", @@ -70,6 +71,7 @@ CUPTI_HEADER_PATHS = [ # the other CUDA libraries but rather in a special extras/CUPTI directory. CUPTI_LIB_PATHS = [ "extras/CUPTI/lib64/", + "lib/powerpc64le-linux-gnu/", "lib/x86_64-linux-gnu/", "lib64/", "extras/CUPTI/libx64/", -- cgit v1.2.3 From 2770a03f79f3b544d40cf1b8ab0a5c971dd36f5f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 26 Sep 2018 02:26:08 -0700 Subject: compat: Update forward compatibility horizon to 2018-09-26 PiperOrigin-RevId: 214574383 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 74fe1fe35c..ce230731b0 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -26,7 +26,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 25) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 26) @tf_export("compat.forward_compatible") -- cgit v1.2.3 From b4ae85234b4f626e4aaee1d2c531a6b534712dbb Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 26 Sep 2018 02:31:31 -0700 Subject: Automated rollback of commit 7229d08f0b25e24e6dd4833a94a27f404b27a350 PiperOrigin-RevId: 214575129 --- WORKSPACE | 20 ++------------------ tensorflow/python/keras/BUILD | 2 -- tensorflow/requirements.txt | 2 -- 3 files changed, 2 insertions(+), 22 deletions(-) delete mode 100644 tensorflow/requirements.txt diff --git a/WORKSPACE b/WORKSPACE index 11605871f3..17961829a6 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -9,26 +9,10 @@ http_archive( "https://github.com/bazelbuild/rules_closure/archive/dbb96841cc0a5fb2664c37822803b06dab20c7d1.tar.gz", # 2018-04-13 ], ) -load("@io_bazel_rules_closure//closure:defs.bzl", "closure_repositories") -closure_repositories() -http_archive( - name = "io_bazel_rules_python", - strip_prefix = "rules_python-8b5d0683a7d878b28fffe464779c8a53659fc645", - urls = [ - "https://github.com/bazelbuild/rules_python/archive/8b5d0683a7d878b28fffe464779c8a53659fc645.tar.gz", - ], -) -load("@io_bazel_rules_python//python:pip.bzl", "pip_repositories") -pip_repositories() +load("@io_bazel_rules_closure//closure:defs.bzl", "closure_repositories") -load("@io_bazel_rules_python//python:pip.bzl", "pip_import") -pip_import( - name = "pip_deps", - requirements = "//tensorflow:requirements.txt", -) -load("@pip_deps//:requirements.bzl", "pip_install") -pip_install() +closure_repositories() # We must check the bazel version before trying to parse any other BUILD # files, in case the parsing of those build files depends on the bazel diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index ac011a2940..4a72c4b3f3 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -7,7 +7,6 @@ exports_files(["LICENSE"]) package(default_visibility = ["//visibility:public"]) -load("@pip_deps//:requirements.bzl", "requirement") load("//tensorflow:tensorflow.bzl", "py_test") load("//tensorflow:tensorflow.bzl", "cuda_py_test") @@ -63,7 +62,6 @@ py_library( ":backend", ":engine", ":layers", - requirement("keras_applications"), "//tensorflow/python/saved_model", "//tensorflow/python:training", ], diff --git a/tensorflow/requirements.txt b/tensorflow/requirements.txt deleted file mode 100644 index 6e111edefc..0000000000 --- a/tensorflow/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -keras_applications >= 1.0.5 -keras_preprocessing >= 1.0.3 -- cgit v1.2.3 From e45f7ee4182d5e831026f329cff5da2596d6733a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 26 Sep 2018 05:01:43 -0700 Subject: Refactoring of nest value getters. PiperOrigin-RevId: 214587760 --- tensorflow/python/BUILD | 1 + tensorflow/python/util/util.cc | 305 ++++++++++++++++++++--------------------- 2 files changed, 147 insertions(+), 159 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 79f14466e6..410b3a553a 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -333,6 +333,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//third_party/python_runtime:headers", + "@com_google_absl//absl/memory", ], ) diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc index 562bbdcfeb..2087957b31 100644 --- a/tensorflow/python/util/util.cc +++ b/tensorflow/python/util/util.cc @@ -15,9 +15,11 @@ limitations under the License. #include "tensorflow/python/util/util.h" #include +#include #include #include +#include "absl/memory/memory.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/logging.h" @@ -222,93 +224,136 @@ int IsSequenceHelper(PyObject* o) { return check_cache->CachedLookup(o); } -// Implements the same idea as tensorflow.util.nest._yield_value -// During construction we check if the iterable is a dictionary. -// If so, we construct a sequence from its sorted keys that will be used -// for iteration. -// If not, we construct a sequence directly from the iterable. -// At each step, we get the next element from the sequence and use it -// either as a key or return it directly. -// -// 'iterable' must not be modified while ValIterator is used. -class ValIterator { +// ValueIterator interface +class ValueIterator { + public: + virtual ~ValueIterator() {} + virtual Safe_PyObjectPtr next() = 0; + + bool valid() const { return is_valid_; } + + protected: + void invalidate() { is_valid_ = false; } + + private: + bool is_valid_ = true; +}; + +using ValueIteratorPtr = std::unique_ptr; + +// Iterate through dictionaries in a deterministic order by sorting the +// keys. Notice this means that we ignore the original order of +// `OrderedDict` instances. This is intentional, to avoid potential +// bugs caused by mixing ordered and plain dicts (e.g., flattening +// a dict but using a corresponding `OrderedDict` to pack it back). +class DictValueIterator : public ValueIterator { public: - explicit ValIterator(PyObject* iterable) - : dict_(nullptr), - mapping_(nullptr), - last_mapping_element_(nullptr), - seq_(nullptr), - index_(0) { - if (PyDict_Check(iterable)) { - dict_ = iterable; - // PyDict_Keys returns a list, which can be used with - // PySequence_Fast_GET_ITEM. - seq_ = PyDict_Keys(iterable); - // Iterate through dictionaries in a deterministic order by sorting the - // keys. Notice this means that we ignore the original order of - // `OrderedDict` instances. This is intentional, to avoid potential - // bugs caused by mixing ordered and plain dicts (e.g., flattening - // a dict but using a corresponding `OrderedDict` to pack it back). - PyList_Sort(seq_); - } else if (IsMappingHelper(iterable)) { - mapping_ = iterable; - seq_ = MappingKeys(iterable); - PyList_Sort(seq_); + explicit DictValueIterator(PyObject* dict) + : dict_(dict), keys_(PyDict_Keys(dict)) { + if (PyList_Sort(keys_.get()) == -1) { + invalidate(); } else { - seq_ = PySequence_Fast(iterable, ""); + iter_.reset(PyObject_GetIter(keys_.get())); } - size_ = PySequence_Fast_GET_SIZE(seq_); } - ~ValIterator() { Py_DECREF(seq_); } - - // Return a borrowed reference to the next element from iterable. - // Return nullptr when iteration is over. - PyObject* next() { - if (TF_PREDICT_FALSE(seq_ == nullptr)) { - return nullptr; - } - PyObject* element = nullptr; - if (index_ < size_) { - // Both PySequence_Fast_GET_ITEM and PyDict_GetItem return borrowed - // references. For general mappings, ValIterator keeps a reference to the - // last retrieved element (and decrefs it before producing the next - // element) to abstract away the borrowed/new difference. - element = PySequence_Fast_GET_ITEM(seq_, index_); - ++index_; - if (dict_ != nullptr) { - element = PyDict_GetItem(dict_, element); - if (element == nullptr) { - PyErr_SetString(PyExc_RuntimeError, - "Dictionary was modified during iteration over it"); - return nullptr; - } - } else if (mapping_ != nullptr) { - element = PyObject_GetItem(mapping_, element); - if (element == nullptr) { - PyErr_SetString(PyExc_RuntimeError, - "Mapping was modified during iteration over it"); - return nullptr; - } - last_mapping_element_.reset(element); + Safe_PyObjectPtr next() override { + Safe_PyObjectPtr result; + Safe_PyObjectPtr key(PyIter_Next(iter_.get())); + if (key) { + // PyDict_GetItem returns a borrowed reference. + PyObject* elem = PyDict_GetItem(dict_, key.get()); + if (elem) { + Py_INCREF(elem); + result.reset(elem); + } else { + PyErr_SetString(PyExc_RuntimeError, + "Dictionary was modified during iteration over it"); } } - return element; + return result; } private: - // Special casing for things that pass PyDict_Check (faster, no Python calls) PyObject* dict_; + Safe_PyObjectPtr keys_; + Safe_PyObjectPtr iter_; +}; - // General mappings which have custom Python logic +// Iterate over mapping objects by sorting the keys first +class MappingValueIterator : public ValueIterator { + public: + explicit MappingValueIterator(PyObject* mapping) + : mapping_(mapping), keys_(MappingKeys(mapping)) { + if (!keys_ || PyList_Sort(keys_.get()) == -1) { + invalidate(); + } else { + iter_.reset(PyObject_GetIter(keys_.get())); + } + } + + Safe_PyObjectPtr next() override { + Safe_PyObjectPtr result; + Safe_PyObjectPtr key(PyIter_Next(iter_.get())); + if (key) { + // Unlike PyDict_GetItem, PyObject_GetItem returns a new reference. + PyObject* elem = PyObject_GetItem(mapping_, key.get()); + if (elem) { + result.reset(elem); + } else { + PyErr_SetString(PyExc_RuntimeError, + "Mapping was modified during iteration over it"); + } + } + return result; + } + + private: PyObject* mapping_; - Safe_PyObjectPtr last_mapping_element_; + Safe_PyObjectPtr keys_; + Safe_PyObjectPtr iter_; +}; + +// Iterate over a sequence, by index. +class SequenceValueIterator : public ValueIterator { + public: + explicit SequenceValueIterator(PyObject* iterable) + : seq_(PySequence_Fast(iterable, "")), + size_(PySequence_Fast_GET_SIZE(seq_.get())), + index_(0) {} - PyObject* seq_; - Py_ssize_t size_; + Safe_PyObjectPtr next() override { + Safe_PyObjectPtr result; + if (index_ < size_) { + // PySequence_Fast_GET_ITEM returns a borrowed reference. + PyObject* elem = PySequence_Fast_GET_ITEM(seq_.get(), index_); + ++index_; + Py_INCREF(elem); + result.reset(elem); + } + + return result; + } + + private: + Safe_PyObjectPtr seq_; + const Py_ssize_t size_; Py_ssize_t index_; }; +// Just return itself as a single item. +class SparseTensorValueIterator : public ValueIterator { + public: + explicit SparseTensorValueIterator(PyObject* tensor) : tensor_(tensor) { + Py_INCREF(tensor); + } + + Safe_PyObjectPtr next() override { return std::move(tensor_); } + + private: + Safe_PyObjectPtr tensor_; +}; + bool IsSparseTensorValueType(PyObject* o) { if (TF_PREDICT_FALSE(SparseTensorValueType == nullptr)) { return false; @@ -322,93 +367,33 @@ int IsSequenceForDataHelper(PyObject* o) { !IsSparseTensorValueType(o); } -bool GetNextValuesForDict(PyObject* nested, - std::vector* next_values) { - Safe_PyObjectPtr keys(PyDict_Keys(nested)); - if (PyList_Sort(keys.get()) == -1) return false; - Py_ssize_t size = PyList_Size(keys.get()); - for (Py_ssize_t i = 0; i < size; ++i) { - // We know that key and item will not be deleted because nested owns - // a reference to them and callers of flatten must not modify nested - // while the method is running. - PyObject* key = PyList_GET_ITEM(keys.get(), i); - PyObject* item = PyDict_GetItem(nested, key); - Py_INCREF(item); - next_values->emplace_back(item); - } - return true; -} - -bool GetNextValuesForMapping(PyObject* nested, - std::vector* next_values) { - Safe_PyObjectPtr keys(MappingKeys(nested)); - if (keys.get() == nullptr) { - return false; - } - if (PyList_Sort(keys.get()) == -1) return false; - Py_ssize_t size = PyList_Size(keys.get()); - for (Py_ssize_t i = 0; i < size; ++i) { - PyObject* key = PyList_GET_ITEM(keys.get(), i); - // Unlike PyDict_GetItem, PyObject_GetItem returns a new reference. - PyObject* item = PyObject_GetItem(nested, key); - next_values->emplace_back(item); - } - return true; -} - -bool GetNextValuesForIterable(PyObject* nested, - std::vector* next_values) { - PyObject* item; - PyObject* iterator = PyObject_GetIter(nested); - if (iterator == nullptr || PyErr_Occurred()) { - return false; - } - while ((item = PyIter_Next(iterator)) != nullptr) { - next_values->emplace_back(item); - } - Py_DECREF(iterator); - return true; -} - -// GetNextValues returns the values that the FlattenHelper function will recurse -// over next. -bool GetNextValues(PyObject* nested, - std::vector* next_values) { +ValueIteratorPtr GetValueIterator(PyObject* nested) { if (PyDict_Check(nested)) { - // if nested is dictionary, sort it by key and recurse on each value - return GetNextValuesForDict(nested, next_values); + return absl::make_unique(nested); } else if (IsMappingHelper(nested)) { - // same treatment as dictionaries, but for custom mapping types - return GetNextValuesForMapping(nested, next_values); + return absl::make_unique(nested); + } else { + return absl::make_unique(nested); } - // iterate and recurse - return GetNextValuesForIterable(nested, next_values); } -// Similar to above, just specialized for the functions in the data pacakage. -bool GetNextValuesForData(PyObject* nested, - std::vector* next_values) { +// Similar to above, just specialized for the functions in the data package. +ValueIteratorPtr GetValueIteratorForData(PyObject* nested) { if (PyDict_Check(nested)) { - // if nested is dictionary, sort it by key and recurse on each value - return GetNextValuesForDict(nested, next_values); + return absl::make_unique(nested); } else if (IsMappingHelper(nested)) { - // same treatment as dictionaries, but for custom mapping types - return GetNextValuesForMapping(nested, next_values); + return absl::make_unique(nested); } else if (IsSparseTensorValueType(nested)) { - // if nested is a SparseTensorValue, just return itself as a single item - Py_INCREF(nested); - next_values->emplace_back(nested); - return true; + return absl::make_unique(nested); + } else { + return absl::make_unique(nested); } - // iterate and recurse - return GetNextValuesForIterable(nested, next_values); } bool FlattenHelper( PyObject* nested, PyObject* list, const std::function& is_sequence_helper, - const std::function*)>& - next_values_getter) { + const std::function& value_iterator_getter) { // if nested is not a sequence, append itself and exit int is_seq = is_sequence_helper(nested); if (is_seq == -1) return false; @@ -416,16 +401,15 @@ bool FlattenHelper( return PyList_Append(list, nested) != -1; } - std::vector next_values; - // Get the next values to recurse over. - if (!next_values_getter(nested, &next_values)) return false; + ValueIteratorPtr iter = value_iterator_getter(nested); + if (!iter->valid()) return false; - for (const auto& item : next_values) { + for (Safe_PyObjectPtr item = iter->next(); item; item = iter->next()) { if (Py_EnterRecursiveCall(" in flatten")) { return false; } - const bool success = - FlattenHelper(item.get(), list, is_sequence_helper, next_values_getter); + const bool success = FlattenHelper(item.get(), list, is_sequence_helper, + value_iterator_getter); Py_LeaveRecursiveCall(); if (!success) { return false; @@ -579,22 +563,25 @@ bool AssertSameStructureHelper( } } - ValIterator iter1(o1); - ValIterator iter2(o2); + ValueIteratorPtr iter1 = GetValueIterator(o1); + ValueIteratorPtr iter2 = GetValueIterator(o2); + + if (!iter1->valid() || !iter2->valid()) return false; while (true) { - PyObject* v1 = iter1.next(); - PyObject* v2 = iter2.next(); - if (v1 != nullptr && v2 != nullptr) { + Safe_PyObjectPtr v1 = iter1->next(); + Safe_PyObjectPtr v2 = iter2->next(); + if (v1 && v2) { if (Py_EnterRecursiveCall(" in assert_same_structure")) { return false; } - bool no_internal_errors = AssertSameStructureHelper( - v1, v2, check_types, error_msg, is_type_error, is_sequence_helper); + bool no_internal_errors = + AssertSameStructureHelper(v1.get(), v2.get(), check_types, error_msg, + is_type_error, is_sequence_helper); Py_LeaveRecursiveCall(); if (!no_internal_errors) return false; if (!error_msg->empty()) return true; - } else if (v1 == nullptr && v2 == nullptr) { + } else if (!v1 && !v2) { // Done with all recursive calls. Structure matched. return true; } else { @@ -655,7 +642,7 @@ bool IsMapping(PyObject* o) { return IsMappingHelper(o) == 1; } PyObject* Flatten(PyObject* nested) { PyObject* list = PyList_New(0); - if (FlattenHelper(nested, list, IsSequenceHelper, GetNextValues)) { + if (FlattenHelper(nested, list, IsSequenceHelper, GetValueIterator)) { return list; } else { Py_DECREF(list); @@ -668,7 +655,7 @@ bool IsSequenceForData(PyObject* o) { return IsSequenceForDataHelper(o) == 1; } PyObject* FlattenForData(PyObject* nested) { PyObject* list = PyList_New(0); if (FlattenHelper(nested, list, IsSequenceForDataHelper, - GetNextValuesForData)) { + GetValueIteratorForData)) { return list; } else { Py_DECREF(list); -- cgit v1.2.3