diff options
26 files changed, 870 insertions, 92 deletions
diff --git a/tensorflow/contrib/android/cmake/CMakeLists.txt b/tensorflow/contrib/android/cmake/CMakeLists.txt index 5ae1a209b9..1f86288cf9 100644 --- a/tensorflow/contrib/android/cmake/CMakeLists.txt +++ b/tensorflow/contrib/android/cmake/CMakeLists.txt @@ -26,6 +26,10 @@ add_library(lib_proto STATIC IMPORTED ) set_target_properties(lib_proto PROPERTIES IMPORTED_LOCATION ${PREBUILT_DIR}/protobuf/lib/libprotobuf.a) +add_library(lib_nsync STATIC IMPORTED ) +set_target_properties(lib_nsync PROPERTIES IMPORTED_LOCATION + ${TARGET_NSYNC_LIB}) + add_library(lib_tf STATIC IMPORTED ) set_target_properties(lib_tf PROPERTIES IMPORTED_LOCATION ${PREBUILT_DIR}/lib/libtensorflow-core.a) @@ -62,10 +66,13 @@ target_link_libraries(tensorflow_inference m z lib_tf - lib_proto) + lib_proto + lib_nsync) + include_directories( ${PREBUILT_DIR}/proto ${PREBUILT_DIR}/protobuf/include + ${PREBUILT_DIR}/nsync/public ${TENSORFLOW_ROOT_DIR}/tensorflow/contrib/makefile/downloads/eigen ${TENSORFLOW_ROOT_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/..) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 3cbb430f0b..f6a47d26c8 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -121,6 +121,7 @@ include(jsoncpp) include(farmhash) include(fft2d) include(highwayhash) +include(nsync) include(protobuf) include(re2) include(cub) @@ -138,6 +139,7 @@ set(tensorflow_EXTERNAL_LIBRARIES ${farmhash_STATIC_LIBRARIES} ${fft2d_STATIC_LIBRARIES} ${highwayhash_STATIC_LIBRARIES} + ${nsync_STATIC_LIBRARIES} ${protobuf_STATIC_LIBRARIES} ${re2_STATIC_LIBRARIES} ) @@ -150,6 +152,7 @@ set(tensorflow_EXTERNAL_DEPENDENCIES jsoncpp farmhash_copy_headers_to_destination highwayhash_copy_headers_to_destination + nsync_copy_headers_to_destination protobuf eigen gemmlowp @@ -174,6 +177,7 @@ include_directories( ${farmhash_INCLUDE_DIR} ${highwayhash_INCLUDE_DIR} ${cub_INCLUDE_DIR} + ${nsync_INCLUDE_DIR} ${PROTOBUF_INCLUDE_DIRS} ${re2_INCLUDE_DIR} ) diff --git a/tensorflow/contrib/cmake/external/nsync.cmake b/tensorflow/contrib/cmake/external/nsync.cmake new file mode 100644 index 0000000000..2c42377f50 --- /dev/null +++ b/tensorflow/contrib/cmake/external/nsync.cmake @@ -0,0 +1,54 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +include (ExternalProject) + +set(nsync_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/nsync/public) +set(nsync_URL https://github.com/google/nsync) +set(nsync_TAG 394e71f0ebeed6788ae6c84d42c1bedf6e1ee9f7) +set(nsync_BUILD ${CMAKE_CURRENT_BINARY_DIR}/nsync/src/nsync) +set(nsync_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/nsync/install) + +# put nsync includes in the directory where they are expected +add_custom_target(nsync_create_destination_dir + COMMAND ${CMAKE_COMMAND} -E make_directory ${nsync_INCLUDE_DIR} + DEPENDS nsync) + +add_custom_target(nsync_copy_headers_to_destination + DEPENDS nsync_create_destination_dir) + +if(WIN32) + set(nsync_HEADERS "${nsync_BUILD}/public/*.h") + set(nsync_STATIC_LIBRARIES ${nsync_INSTALL}/lib/nsync.lib) +else() + set(nsync_HEADERS "${nsync_BUILD}/public/*.h") + set(nsync_STATIC_LIBRARIES ${nsync_INSTALL}/lib/libnsync.a) +endif() + +ExternalProject_Add(nsync + PREFIX nsync + GIT_REPOSITORY ${nsync_URL} + GIT_TAG ${nsync_TAG} + DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" + BUILD_IN_SOURCE 1 + PATCH_COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/patches/nsync/CMakeLists.txt ${nsync_BUILD} + INSTALL_DIR ${nsync_INSTALL} + CMAKE_CACHE_ARGS + -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF + -DCMAKE_INSTALL_PREFIX:STRING=${nsync_INSTALL} + -DNSYNC_LANGUAGE:STRING=c++11) + +add_custom_command(TARGET nsync_copy_headers_to_destination PRE_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_directory ${nsync_INSTALL}/include/ ${nsync_INCLUDE_DIR}/) diff --git a/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt b/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt new file mode 100644 index 0000000000..3bbeb2ec0c --- /dev/null +++ b/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt @@ -0,0 +1,291 @@ +cmake_minimum_required (VERSION 2.8.12) + +# nsync provides portable synchronization primitives, such as mutexes and +# condition variables. +project (nsync) + +# Set variable NSYNC_LANGUAGE to "c++11" to build with C++11 +# rather than C. + +# Some builds need position-independent code. +set (CMAKE_POSITION_INDEPENDENT_CODE ON) + +# ----------------------------------------------------------------- +# Platform dependencies + +# Many platforms use these posix related sources; even Win32. +set (NSYNC_POSIX_SRC + "platform/posix/src/nsync_panic.c" + "platform/posix/src/per_thread_waiter.c" + "platform/posix/src/time_rep.c" + "platform/posix/src/yield.c" +) + +# Many of the string matches below use a literal "X" suffix on both sides. +# This is because some versions of cmake treat (for example) "MSVC" (in quotes) +# as a reference to the variable MSVC, thus the expression +# "${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC" +# is false when ${CMAKE_C_COMPILER_ID} has the value "MSVC"! See +# https://cmake.org/cmake/help/v3.1/policy/CMP0054.html + +# Pick the include directory for the operating system. +if ("${NSYNC_LANGUAGE}X" STREQUAL "c++11X") + include_directories ("${PROJECT_SOURCE_DIR}/platform/c++11") + add_definitions ("-DNSYNC_USE_CPP11_TIMEPOINT -DNSYNC_ATOMIC_CPP11") + set (NSYNC_OS_CPP_SRC + "platform/c++11/src/nsync_semaphore_mutex.cc" + "platform/c++11/src/per_thread_waiter.cc" + "platform/c++11/src/yield.cc" + "platform/c++11/src/time_rep_timespec.cc" + "platform/c++11/src/nsync_panic.cc" + ) + if ("${CMAKE_SYSTEM_NAME}X" STREQUAL "WindowsX") + include_directories ("${PROJECT_SOURCE_DIR}/platform/win32") + add_compile_options ("/TP") + set (NSYNC_OS_SRC + "platform/win32/src/clock_gettime.c" + "platform/win32/src/pthread_key_win32.cc" + ${NSYNC_OS_CPP_SRC} + ) + set (NSYNC_TEST_OS_SRC + "platform/win32/src/start_thread.c" + ) + elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "DarwinX") + include_directories ("${PROJECT_SOURCE_DIR}/platform/macos") + add_compile_options ("-std=c++11") + set (NSYNC_OS_SRC + ${NSYNC_OS_CPP_SRC} + ) + set (NSYNC_TEST_OS_SRC + "platform/posix/src/start_thread.c" + ) + elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "LinuxX") + include_directories ("${PROJECT_SOURCE_DIR}/platform/posix") + add_compile_options ("-std=c++11") + set (NSYNC_OS_SRC + ${NSYNC_OS_CPP_SRC} + ) + set (NSYNC_TEST_OS_SRC + "platform/posix/src/start_thread.c" + ) + elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "NetBSDX") + include_directories ("${PROJECT_SOURCE_DIR}/platform/posix") + add_compile_options ("-std=c++11") + set (NSYNC_OS_SRC + ${NSYNC_OS_CPP_SRC} + ) + set (NSYNC_TEST_OS_SRC + "platform/posix/src/start_thread.c" + ) + elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "FreeBSDX") + include_directories ("${PROJECT_SOURCE_DIR}/platform/posix") + add_compile_options ("-std=c++11") + set (NSYNC_OS_SRC + ${NSYNC_OS_CPP_SRC} + ) + set (NSYNC_TEST_OS_SRC + "platform/posix/src/start_thread.c" + ) + elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "OpenBSDX") + include_directories ("${PROJECT_SOURCE_DIR}/platform/posix") + add_compile_options ("-std=c++11") + set (NSYNC_OS_SRC + ${NSYNC_OS_CPP_SRC} + ) + set (NSYNC_TEST_OS_SRC + "platform/posix/src/start_thread.c" + ) + endif () +endif () + +# Pick the include directory for the compiler. +if ("${CMAKE_C_COMPILER_ID}X" STREQUAL "GNUX") + include_directories ("${PROJECT_SOURCE_DIR}/platform/gcc") + set (THREADS_HAVE_PTHREAD_ARG ON) +elseif ("${CMAKE_C_COMPILER_ID}X" STREQUAL "ClangX") + include_directories ("${PROJECT_SOURCE_DIR}/platform/clang") + set (THREADS_HAVE_PTHREAD_ARG ON) +elseif ("${CMAKE_C_COMPILER_ID}X" STREQUAL "MSVCX") + include_directories ("${PROJECT_SOURCE_DIR}/platform/msvc") +else () + message (WARNING "CMAKE_C_COMPILER_ID (${CMAKE_C_COMPILER_ID}) matched NOTHING") +endif () + +if (NOT "${NSYNC_LANGUAGE}X" STREQUAL "c++11X") + if ("${CMAKE_SYSTEM_NAME}X" STREQUAL "WindowsX") + include_directories ("${PROJECT_SOURCE_DIR}/platform/win32") + set (NSYNC_OS_SRC + ${NSYNC_POSIX_SRC} + "platform/win32/src/clock_gettime.c" + "platform/win32/src/init_callback_win32.c" + "platform/win32/src/nanosleep.c" + "platform/win32/src/nsync_semaphore_win32.c" + "platform/win32/src/pthread_cond_timedwait_win32.c" + "platform/win32/src/pthread_key_win32.cc" + ) + set (NSYNC_TEST_OS_SRC + "platform/win32/src/start_thread.c" + ) + elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "DarwinX") + include_directories ("${PROJECT_SOURCE_DIR}/platform/macos") + set (NSYNC_POSIX ON) + include_directories ("${PROJECT_SOURCE_DIR}/platform/posix") + elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "LinuxX") + include_directories ("${PROJECT_SOURCE_DIR}/platform/linux") + set (NSYNC_POSIX ON) + set (NSYNC_OS_EXTRA_SRC + "platform/linux/src/nsync_semaphore_futex.c" + ) + elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "NetBSDX") + include_directories ("${PROJECT_SOURCE_DIR}/platform/netbsd") + set (NSYNC_POSIX ON) + elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "FreeBSDX") + include_directories ("${PROJECT_SOURCE_DIR}/platform/freebsd") + set (NSYNC_POSIX ON) + elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "OpenBSDX") + include_directories ("${PROJECT_SOURCE_DIR}/platform/openbsd") + set (NSYNC_POSIX ON) + endif () +endif () + +if (NSYNC_POSIX) + include_directories ("${PROJECT_SOURCE_DIR}/platform/posix") + set (NSYNC_OS_SRC + ${NSYNC_POSIX_SRC} + ${NSYNC_OS_EXTRA_SRC} + ) + set (NSYNC_TEST_OS_SRC + "platform/posix/src/start_thread.c" + ) +endif () + +# Pick the include directory for the architecture. +if (("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "x86_64X") OR + ("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "amd64X") OR + ("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "AMD64X")) + include_directories ("${PROJECT_SOURCE_DIR}/platform/x86_64") +elseif (("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "x86_32X") OR + ("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "i386X") OR + ("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "i686X")) + include_directories ("${PROJECT_SOURCE_DIR}/platform/x86_32") +elseif (("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "armv6lX") OR + ("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "armv7lX") OR + ("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "armX")) + include_directories ("${PROJECT_SOURCE_DIR}/platform/arm") +elseif (("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "aarch64X") OR + ("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "arm64X")) + include_directories ("${PROJECT_SOURCE_DIR}/platform/aarch64") +elseif (("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "ppcX") OR + ("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "ppc32X")) + include_directories ("${PROJECT_SOURCE_DIR}/platform/ppc32") +elseif (("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "ppc64X")) + include_directories ("${PROJECT_SOURCE_DIR}/platform/ppc64") +endif () + +# Windows uses some include files from the posix directory also. +if ("${CMAKE_SYSTEM_NAME}X" STREQUAL "WindowsX") + include_directories ("${PROJECT_SOURCE_DIR}/platform/posix") +endif () + +# ----------------------------------------------------------------- + +include_directories ("${PROJECT_SOURCE_DIR}/public") +include_directories ("${PROJECT_SOURCE_DIR}/internal") + +set (NSYNC_SRC + "internal/common.c" + "internal/counter.c" + "internal/cv.c" + "internal/debug.c" + "internal/dll.c" + "internal/mu.c" + "internal/mu_wait.c" + "internal/note.c" + "internal/once.c" + "internal/sem_wait.c" + "internal/time_internal.c" + "internal/wait.c" + ${NSYNC_OS_SRC} +) +add_library (nsync ${NSYNC_SRC}) + +set (NSYNC_TEST_SRC + "testing/array.c" + "testing/atm_log.c" + "testing/closure.c" + "testing/smprintf.c" + "testing/testing.c" + "testing/time_extra.c" + ${NSYNC_TEST_OS_SRC} +) +add_library (nsync_test ${NSYNC_TEST_SRC}) + +set (NSYNC_TESTS + "counter_test" + "cv_mu_timeout_stress_test" + "cv_test" + "cv_wait_example_test" + "dll_test" + "mu_starvation_test" + "mu_test" + "mu_wait_example_test" + "mu_wait_test" + "note_test" + "once_test" + "pingpong_test" + "wait_test" +) + +if ("${NSYNC_LANGUAGE}X" STREQUAL "c++11X") + foreach (s IN ITEMS ${NSYNC_SRC} ${NSYNC_TEST_SRC}) + SET_SOURCE_FILES_PROPERTIES ("${s}" PROPERTIES LANGUAGE CXX) + endforeach (s) + foreach (t IN ITEMS ${NSYNC_TESTS}) + SET_SOURCE_FILES_PROPERTIES ("testing/${t}.c" PROPERTIES LANGUAGE CXX) + endforeach (t) +endif () + +enable_testing () +foreach (t IN ITEMS ${NSYNC_TESTS}) + add_executable (${t} "testing/${t}.c") +endforeach (t) + +find_package (Threads REQUIRED) +set (THREADS_PREFER_PTHREAD_FLAG ON) +foreach (t IN ITEMS "nsync" "nsync_test" ${NSYNC_TESTS}) + if (THREADS_HAVE_PTHREAD_ARG) + target_compile_options (${t} PUBLIC "-pthread") + endif () + if (CMAKE_THREAD_LIBS_INIT) + target_link_libraries (${t} "${CMAKE_THREAD_LIBS_INIT}") + endif () +endforeach (t) + +foreach (t IN ITEMS ${NSYNC_TESTS}) + target_link_libraries (${t} nsync_test nsync) + add_test (NAME ${t} COMMAND ${t}) +endforeach (t) + +install (TARGETS nsync + LIBRARY DESTINATION lib COMPONENT RuntimeLibraries + ARCHIVE DESTINATION lib COMPONENT Development) + +set (NSYNC_INCLUDES + "public/nsync.h" + "public/nsync_atomic.h" + "public/nsync_counter.h" + "public/nsync_cpp.h" + "public/nsync_cv.h" + "public/nsync_debug.h" + "public/nsync_mu.h" + "public/nsync_mu_wait.h" + "public/nsync_note.h" + "public/nsync_once.h" + "public/nsync_time.h" + "public/nsync_time_internal.h" + "public/nsync_waiter.h" +) + +foreach (NSYNC_INCLUDE ${NSYNC_INCLUDES}) + install (FILES ${NSYNC_INCLUDE} DESTINATION include COMPONENT Development) +endforeach () diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile index 2e45ddad54..a4f7453ed5 100644 --- a/tensorflow/contrib/makefile/Makefile +++ b/tensorflow/contrib/makefile/Makefile @@ -74,6 +74,7 @@ HOST_INCLUDES := \ -I$(MAKEFILE_DIR)/downloads/ \ -I$(MAKEFILE_DIR)/downloads/eigen \ -I$(MAKEFILE_DIR)/downloads/gemmlowp \ +-I$(MAKEFILE_DIR)/downloads/nsync/public \ -I$(HOST_GENDIR) ifeq ($(HAS_GEN_HOST_PROTOC),true) HOST_INCLUDES += -I$(MAKEFILE_DIR)/gen/protobuf-host/include @@ -83,6 +84,7 @@ endif HOST_INCLUDES += -I/usr/local/include HOST_LIBS := \ +$(HOST_NSYNC_LIB) \ -lstdc++ \ -lprotobuf \ -lpthread \ @@ -153,6 +155,7 @@ INCLUDES := \ -I$(MAKEFILE_DIR)/downloads/ \ -I$(MAKEFILE_DIR)/downloads/eigen \ -I$(MAKEFILE_DIR)/downloads/gemmlowp \ +-I$(MAKEFILE_DIR)/downloads/nsync/public \ -I$(PROTOGENDIR) \ -I$(PBTGENDIR) ifeq ($(HAS_GEN_HOST_PROTOC),true) @@ -163,6 +166,7 @@ endif INCLUDES += -I/usr/local/include LIBS := \ +$(TARGET_NSYNC_LIB) \ -lstdc++ \ -lprotobuf \ -lz \ @@ -249,11 +253,13 @@ ifeq ($(TARGET),ANDROID) -I$(MAKEFILE_DIR)/downloads/ \ -I$(MAKEFILE_DIR)/downloads/eigen \ -I$(MAKEFILE_DIR)/downloads/gemmlowp \ +-I$(MAKEFILE_DIR)/downloads/nsync/public \ -I$(MAKEFILE_DIR)/gen/protobuf/include \ -I$(PROTOGENDIR) \ -I$(PBTGENDIR) LIBS := \ +$(TARGET_NSYNC_LIB) \ -lgnustl_static \ -lprotobuf \ -llog \ diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md index 0306ecb214..835d68489e 100644 --- a/tensorflow/contrib/makefile/README.md +++ b/tensorflow/contrib/makefile/README.md @@ -104,6 +104,9 @@ Then, execute the following: ```bash tensorflow/contrib/makefile/download_dependencies.sh tensorflow/contrib/makefile/compile_android_protobuf.sh -c +export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh` +export TARGET_NSYNC_LIB=`CC_PREFIX="${CC_PREFIX}" NDK_ROOT="${NDK_ROOT}" \ + tensorflow/contrib/makefile/compile_nsync.sh -t android -a armeabi-v7a` make -f tensorflow/contrib/makefile/Makefile TARGET=ANDROID ``` @@ -196,6 +199,12 @@ Next, you will need to compile protobufs for iOS: tensorflow/contrib/makefile/compile_ios_protobuf.sh ``` +Then, you will need to compile the nsync library for iOS: + +```export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh` +export TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios` +``` + Then, you can run the makefile specifying iOS as the target, along with the architecture you want to build for: @@ -220,7 +229,8 @@ library in a simple app. #### Universal binaries In some situations, you will need a universal library. In that case, you will -still need to run `compile_ios_protobuf.sh`, but this time follow it with: +still need to run `compile_ios_protobuf.sh` and `compile_nsync.sh`, but this +time follow it with: ```bash compile_ios_tensorflow.sh @@ -258,6 +268,8 @@ make sudo make install sudo ldconfig # refresh shared library cache cd ../../../../.. +export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh` +export TARGET_NSYNC_LIB="$HOST_NSYNC_LIB" ``` Once that's done, you can use make to build the library and example: diff --git a/tensorflow/contrib/makefile/build_all_android.sh b/tensorflow/contrib/makefile/build_all_android.sh index 7f0c3f38c2..9944f71950 100755 --- a/tensorflow/contrib/makefile/build_all_android.sh +++ b/tensorflow/contrib/makefile/build_all_android.sh @@ -67,6 +67,13 @@ else make -f tensorflow/contrib/makefile/Makefile clean_except_protobuf_libs fi +# Compile nsync for the host and the target Android device architecture. +# Don't use export var=`something` syntax; it swallows the exit status. +HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh` +TARGET_NSYNC_LIB=`CC_PREFIX="${CC_PREFIX}" NDK_ROOT="${NDK_ROOT}" \ + tensorflow/contrib/makefile/compile_nsync.sh -t android -a armeabi-v7a` +export HOST_NSYNC_LIB TARGET_NSYNC_LIB + if [[ ! -z "${HEXAGON_LIB_PATH}" ]]; then echo "Copy hexagon libraries from ${HEXAGON_LIB_PATH}" @@ -92,6 +99,7 @@ fi if [[ -z "${BUILD_TARGET}" ]]; then make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ TARGET=ANDROID NDK_ROOT="${NDK_ROOT}" CC_PREFIX="${CC_PREFIX}" \ + HOST_NSYNC_LIB="$HOST_NSYNC_LIB" TARGET_NSYNC_LIB="$TARGET_NSYNC_LIB" \ HEXAGON_LIBS="${HEXAGON_LIBS}" HEXAGON_INCLUDE="${HEXAGON_INCLUDE}" \ SUB_MAKEFILES="${SUB_MAKEFILES}" ${EXTRA_MAKE_ARGS[@]} else @@ -99,6 +107,7 @@ else # passed to make in a single build_all_android.sh invocation. make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \ TARGET=ANDROID NDK_ROOT="${NDK_ROOT}" CC_PREFIX="${CC_PREFIX}" \ + HOST_NSYNC_LIB="$HOST_NSYNC_LIB" TARGET_NSYNC_LIB="$TARGET_NSYNC_LIB" \ HEXAGON_LIBS="${HEXAGON_LIBS}" HEXAGON_INCLUDE="${HEXAGON_INCLUDE}" \ SUB_MAKEFILES="${SUB_MAKEFILES}" ${EXTRA_MAKE_ARGS[@]} ${BUILD_TARGET} fi diff --git a/tensorflow/contrib/makefile/build_all_ios.sh b/tensorflow/contrib/makefile/build_all_ios.sh index a0f5652854..a49bbe4565 100755 --- a/tensorflow/contrib/makefile/build_all_ios.sh +++ b/tensorflow/contrib/makefile/build_all_ios.sh @@ -47,6 +47,12 @@ tensorflow/contrib/makefile/download_dependencies.sh # Compile protobuf for the target iOS device architectures. tensorflow/contrib/makefile/compile_ios_protobuf.sh +# Compile nsync for the target iOS device architectures. +# Don't use export var=`something` syntax; it swallows the exit status. +HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh` +TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios` +export HOST_NSYNC_LIB TARGET_NSYNC_LIB + # Build the iOS TensorFlow libraries. tensorflow/contrib/makefile/compile_ios_tensorflow.sh "-O3" diff --git a/tensorflow/contrib/makefile/build_all_linux.sh b/tensorflow/contrib/makefile/build_all_linux.sh index 6bf1c6d683..5d73f697f4 100755 --- a/tensorflow/contrib/makefile/build_all_linux.sh +++ b/tensorflow/contrib/makefile/build_all_linux.sh @@ -32,6 +32,12 @@ rm -rf tensorflow/contrib/makefile/downloads # Pull down the required versions of the frameworks we need. tensorflow/contrib/makefile/download_dependencies.sh +# Compile nsync. +# Don't use export var=`something` syntax; it swallows the exit status. +HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh` +TARGET_NSYNC_LIB="$HOST_NSYNC_LIB" +export HOST_NSYNC_LIB TARGET_NSYNC_LIB + # Compile protobuf. tensorflow/contrib/makefile/compile_linux_protobuf.sh diff --git a/tensorflow/contrib/makefile/compile_nsync.sh b/tensorflow/contrib/makefile/compile_nsync.sh new file mode 100755 index 0000000000..207661ee46 --- /dev/null +++ b/tensorflow/contrib/makefile/compile_nsync.sh @@ -0,0 +1,310 @@ +#!/usr/bin/env bash +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ============================================================================== + +# Compile the nsync library for the platforms given as arguments. + +set -e + +prog=compile_nsync.sh +android_api_version=21 +default_android_arch=armeabi-v7a +default_ios_arch="i386 x86_64 armv7 armv7s arm64" + +usage="usage: $prog [-t linux|ios|android|macos|native] + [-a architecture] [-v android_api_version] + +A script to build nsync for tensorflow. +This script can be run on Linux or MacOS host platforms, and can target +Linux, MacOS, iOS, or Android. + +Options: +-t target_platform +The default target platform is the native host platform. + +-a architecture +For Android and iOS target platforms, specify which architecture +to target. +For iOS, the default is: $default_ios_arch. +For Android, the default is: $default_android_arch. + +-v android_api_version +Specify the Android API version; the default is $android_api_version." + +# Deduce host platform. +host_platform= +nsync_path= +case `uname -s` in +Linux) host_platform=linux android_host=linux;; +Darwin) host_platform=macos android_host=darwin;; +*) echo "$prog: can't deduce host platform" >&2; exit 2;; +esac +host_arch=`uname -m` +case "$host_arch" in i[345678]86) host_arch=x86_32;; esac + +# Parse command line. +target_platform=native # Default is to build for the host. +target_arch=default +while + arg="${1-}" + case "$arg" in + -*) case "$arg" in -*t*) target_platform="${2?"$usage"}"; shift; esac + case "$arg" in -*a*) target_arch="${2?"$usage"}"; shift; esac + case "$arg" in -*v*) android_api_version="${2?"$usage"}"; shift; esac + case "$arg" in -*[!atv]*) echo "$usage" >&2; exit 2;; esac;; + "") break;; + *) echo "$usage" >&2; exit 2;; + esac +do + shift +done + +# Sanity check the target platform. +case "$target_platform" in +native) target_platform="$host_platform";; +esac + +# Change directory to the root of the source tree. +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "${SCRIPT_DIR}/../../.." + +nsync_builds_dir=tensorflow/contrib/makefile/downloads/nsync/builds + +case "$target_platform" in +ios) case "$target_arch" in + default) archs="$default_ios_arch";; + *) archs="$target_arch";; + esac + ;; +android) case "$target_arch" in + default) archs="$default_android_arch";; + *) archs="$target_arch";; + esac + ;; +*) archs="$target_arch";; +esac + +# For ios, the library names for the CPU types accumulate in $platform_libs +platform_libs= + +# Compile nsync. +for arch in $archs; do + nsync_platform_dir="$nsync_builds_dir/$arch.$target_platform.c++11" + + # Get Makefile for target. + case "$target_platform" in + linux) makefile=' + CC=${CC_PREFIX} g++ + PLATFORM_CPPFLAGS=-DNSYNC_USE_CPP11_TIMEPOINT -DNSYNC_ATOMIC_CPP11 \ + -I../../platform/c++11 -I../../platform/gcc \ + -I../../platform/posix -pthread + PLATFORM_CFLAGS=-std=c++11 -Werror -Wall -Wextra -pedantic + PLATFORM_LDFLAGS=-pthread + MKDEP=${CC} -M -std=c++11 + PLATFORM_C=../../platform/c++11/src/nsync_semaphore_mutex.cc \ + ../../platform/c++11/src/per_thread_waiter.cc \ + ../../platform/c++11/src/yield.cc \ + ../../platform/c++11/src/time_rep_timespec.cc \ + ../../platform/c++11/src/nsync_panic.cc + PLATFORM_OBJS=nsync_semaphore_mutex.o per_thread_waiter.o yield.o \ + time_rep_timespec.o nsync_panic.o + TEST_PLATFORM_C=../../platform/c++11/src/start_thread.cc + TEST_PLATFORM_OBJS=start_thread.o + include ../../platform/posix/make.common + include dependfile + ';; + + ios) xcode=/Applications/Xcode.app/Contents/Developer/Platforms + arch_flags= + case "$arch" in + i386|x86_64) + arch_flags="$arch_flags -mios-simulator-version-min=8.0" + arch_flags="$arch_flags -isysroot $xcode/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator10.0.sdk" + ;; + *) + arch_flags="$arch_flags -miphoneos-version-min=8.0" + arch_flags="$arch_flags -isysroot $xcode/iPhoneOS.platform/Developer/SDKs/iPhoneOS10.0.sdk" + ;; + esac + makefile=' + CC=${CC_PREFIX} clang++ + PLATFORM_CPPFLAGS=-DNSYNC_USE_CPP11_TIMEPOINT -DNSYNC_ATOMIC_CPP11 \ + -I../../platform/c++11 -I../../platform/gcc_no_tls \ + -I../../platform/macos -I../../platform/posix -pthread + PLATFORM_CFLAGS=-arch '"$arch"' -fno-exceptions -stdlib=libc++ \ + -fembed-bitcode '"$arch_flags"' -fPIC -x c++ \ + -std=c++11 -Werror -Wall -Wextra -pedantic + PLATFORM_LDFLAGS=-pthread + MKDEP=${CC} -x c++ -M -std=c++11 + PLATFORM_C=../../platform/posix/src/clock_gettime.c \ + ../../platform/c++11/src/nsync_semaphore_mutex.cc \ + ../../platform/posix/src/per_thread_waiter.c \ + ../../platform/c++11/src/yield.cc \ + ../../platform/c++11/src/time_rep_timespec.cc \ + ../../platform/c++11/src/nsync_panic.cc + PLATFORM_OBJS=clock_gettime.o nsync_semaphore_mutex.o per_thread_waiter.o \ + yield.o time_rep_timespec.o nsync_panic.o + TEST_PLATFORM_C=../../platform/c++11/src/start_thread.cc + TEST_PLATFORM_OBJS=start_thread.o + include ../../platform/posix/make.common + include dependfile + ';; + + macos) makefile=' + CC=${CC_PREFIX} clang++ + PLATFORM_CPPFLAGS=-DNSYNC_USE_CPP11_TIMEPOINT -DNSYNC_ATOMIC_CPP11 \ + -I../../platform/c++11 -I../../platform/gcc \ + -I../../platform/macos -I../../platform/posix -pthread + PLATFORM_CFLAGS=-x c++ -std=c++11 -Werror -Wall -Wextra -pedantic + PLATFORM_LDFLAGS=-pthread + MKDEP=${CC} -x c++ -M -std=c++11 + PLATFORM_C=../../platform/posix/src/clock_gettime.c \ + ../../platform/c++11/src/nsync_semaphore_mutex.cc \ + ../../platform/posix/src/per_thread_waiter.c \ + ../../platform/c++11/src/yield.cc \ + ../../platform/c++11/src/time_rep_timespec.cc \ + ../../platform/c++11/src/nsync_panic.cc + PLATFORM_OBJS=clock_gettime.o nsync_semaphore_mutex.o per_thread_waiter.o \ + yield.o time_rep_timespec.o nsync_panic.o + TEST_PLATFORM_C=../../platform/c++11/src/start_thread.cc + TEST_PLATFORM_OBJS=start_thread.o + include ../../platform/posix/make.common + include dependfile + ';; + + android) + # The Android build uses many different names for the same + # platform in different parts of the tree, so things get messy here. + + # Make $android_os_arch be the OS-arch name for the host + # binaries used in the NDK tree. + case "$host_platform" in + linux) android_os_arch=linux;; + macos) android_os_arch=darwin;; + *) android_os_arch="$host_platform";; + esac + case "$host_arch" in + x86_32) android_os_arch="$android_os_arch"-x86;; + *) android_os_arch="$android_os_arch-$host_arch";; + esac + + case "$arch" in + arm64-v8a) toolchain="aarch64-linux-android-4.9" + sysroot_arch="arm64" + bin_prefix="aarch64-linux-android" + march_option= + ;; + armeabi) toolchain="arm-linux-androideabi-4.9" + sysroot_arch="arm" + bin_prefix="arm-linux-androideabi" + march_option= + ;; + armeabi-v7a) toolchain="arm-linux-androideabi-4.9" + sysroot_arch="arm" + bin_prefix="arm-linux-androideabi" + march_option="-march=armv7-a" + ;; + armeabi-v7a-hard) toolchain="arm-linux-androideabi-4.9" + sysroot_arch="arm" + bin_prefix="arm-linux-androideabi" + march_option="-march=armv7-a" + ;; + mips) toolchain="mipsel-linux-android-4.9" + sysroot_arch="mips" + bin_prefix="mipsel-linux-android" + march_option= + ;; + mips64) toolchain="mips64el-linux-android-4.9" + sysroot_arch="mips64" + bin_prefix="mips64el-linux-android" + march_option= + ;; + x86) toolchain="x86-4.9" + sysroot_arch="x86" + bin_prefix="i686-linux-android" + march_option= + ;; + x86_64) toolchain="x86_64-4.9" + sysroot_arch="x86_64" + bin_prefix="x86_64-linux-android" + march_option= + ;; + *) echo "android is not supported for $arch" >&2 + echo "$usage" >&2 + exit 2 + ;; + esac + + + android_target_platform=armeabi + case "$NDK_ROOT" in + "") echo "$prog: requires \$NDK_ROOT for android build" >&2 + exit 2;; + esac + + makefile=' + CC=${CC_PREFIX} \ + ${NDK_ROOT}/toolchains/'"$toolchain"'/prebuilt/'"$android_os_arch"'/bin/'"$bin_prefix"'-g++ + PLATFORM_CPPFLAGS=--sysroot \ + $(NDK_ROOT)/platforms/android-'"$android_api_version"'/arch-'"$sysroot_arch"' \ + -DNSYNC_USE_CPP11_TIMEPOINT -DNSYNC_ATOMIC_CPP11 \ + -I$(NDK_ROOT)/sources/android/support/include \ + -I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/include \ + -I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/libs/'"$arch"'/include \ + -I../../platform/c++11 -I../../platform/gcc \ + -I../../platform/posix -pthread + PLATFORM_CFLAGS=-std=c++11 -Wno-narrowing '"$march_option"' \ + -mfloat-abi=softfp -mfpu=neon -fPIE + PLATFORM_LDFLAGS=-pthread + MKDEP=${CC} -M -std=c++11 + PLATFORM_C=../../platform/c++11/src/nsync_semaphore_mutex.cc \ + ../../platform/c++11/src/per_thread_waiter.cc \ + ../../platform/c++11/src/yield.cc \ + ../../platform/c++11/src/time_rep_timespec.cc \ + ../../platform/c++11/src/nsync_panic.cc + PLATFORM_OBJS=nsync_semaphore_mutex.o per_thread_waiter.o yield.o \ + time_rep_timespec.o nsync_panic.o + TEST_PLATFORM_C=../../platform/c++11/src/start_thread.cc + TEST_PLATFORM_OBJS=start_thread.o + include ../../platform/posix/make.common + include dependfile + ';; + + *) echo "$usage" >&2; exit 2;; + esac + + if [ ! -d "$nsync_platform_dir" ]; then + mkdir "$nsync_platform_dir" + echo "$makefile" | sed 's,^[ \t]*,,' > "$nsync_platform_dir/Makefile" + touch "$nsync_platform_dir/dependfile" + fi + if (cd "$nsync_platform_dir" && make depend nsync.a >&2); then + case "$target_platform" in + ios) platform_libs="$platform_libs '$nsync_platform_dir/nsync.a'";; + *) echo "$nsync_platform_dir/nsync.a";; + esac + else + exit 2 # The if-statement suppresses the "set -e" on the "make". + fi +done + +case "$target_platform" in +ios) nsync_platform_dir="$nsync_builds_dir/lipo.$target_platform.c++11" + mkdir "$nsync_platform_dir" + eval lipo $platform_libs -create -output '$nsync_platform_dir/nsync.a' + echo "$nsync_platform_dir/nsync.a" + ;; +esac diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh index f123111df8..bb30a3b5a7 100755 --- a/tensorflow/contrib/makefile/download_dependencies.sh +++ b/tensorflow/contrib/makefile/download_dependencies.sh @@ -22,6 +22,7 @@ BZL_FILE_PATH=tensorflow/workspace.bzl EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)" GEMMLOWP_URL="$(grep -o 'http.*github.com/google/gemmlowp/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" +NSYNC_URL="$(grep -o 'http.*github.com/google/nsync/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)" PROTOBUF_URL="$(grep -o 'http.*github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)" RE2_URL="$(grep -o 'http.*github.com/google/re2/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)" @@ -56,6 +57,7 @@ download_and_extract() { download_and_extract "${EIGEN_URL}" "${DOWNLOADS_DIR}/eigen" download_and_extract "${GEMMLOWP_URL}" "${DOWNLOADS_DIR}/gemmlowp" download_and_extract "${GOOGLETEST_URL}" "${DOWNLOADS_DIR}/googletest" +download_and_extract "${NSYNC_URL}" "${DOWNLOADS_DIR}/nsync" download_and_extract "${PROTOBUF_URL}" "${DOWNLOADS_DIR}/protobuf" download_and_extract "${RE2_URL}" "${DOWNLOADS_DIR}/re2" diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 3bff497975..8322f0a897 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -67,6 +67,7 @@ load( "if_not_android_mips_and_mips64", "if_ios", "if_linux_x86_64", + "if_mobile", "if_not_mobile", "if_not_windows", "tf_copts", @@ -514,8 +515,10 @@ cc_library( visibility = ["//visibility:public"], deps = [ - "//tensorflow/core/platform/default/build_config:minimal", + "@nsync//:nsync_cpp", + ] + [ "//third_party/eigen3", + "//tensorflow/core/platform/default/build_config:minimal", ], ) @@ -954,6 +957,7 @@ cc_library( deps = [ ":protos_cc", "//third_party/eigen3", + "@nsync//:nsync_cpp", ], alwayslink = 1, ) @@ -976,6 +980,7 @@ cc_library( ":protos_cc", "//third_party/eigen3", "@gemmlowp//:gemmlowp", + "@nsync//:nsync_cpp", ], alwayslink = 1, ) @@ -1058,6 +1063,7 @@ cc_library( deps = [ ":protos_cc", "//third_party/eigen3", + "@nsync//:nsync_cpp", ], alwayslink = 1, ) @@ -1620,10 +1626,12 @@ tf_cuda_library( cc_header_only_library( name = "framework_headers_lib", + includes = ["../../external/nsync/public"], visibility = ["//visibility:public"], deps = [ ":framework", ":reader_base", + "@nsync//:nsync_headers", ], ) diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 0af38affd5..917e13fd4c 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -232,7 +232,7 @@ def tf_additional_lib_defines(): }) def tf_additional_lib_deps(): - return select({ + return ["@nsync//:nsync_cpp"] + select({ "//tensorflow:with_jemalloc_linux_x86_64": ["@jemalloc"], "//tensorflow:with_jemalloc_linux_ppc64le": ["@jemalloc"], "//conditions:default": [], diff --git a/tensorflow/core/platform/default/mutex.h b/tensorflow/core/platform/default/mutex.h index a6011ec9e2..c3e44c42d9 100644 --- a/tensorflow/core/platform/default/mutex.h +++ b/tensorflow/core/platform/default/mutex.h @@ -22,6 +22,8 @@ limitations under the License. #include <chrono> #include <condition_variable> #include <mutex> +#include "nsync_cv.h" +#include "nsync_mu.h" #include "tensorflow/core/platform/thread_annotations.h" namespace tensorflow { @@ -29,36 +31,133 @@ namespace tensorflow { enum LinkerInitialized { LINKER_INITIALIZED }; -// A class that wraps around the std::mutex implementation, only adding an -// additional LinkerInitialized constructor interface. -class LOCKABLE mutex : public std::mutex { +// Mimic std::mutex + C++17's shared_mutex, adding a LinkerInitialized +// constructor interface. This type is as fast as mutex, but is also a shared +// lock. +class LOCKABLE mutex { public: - mutex() {} - // The default implementation of std::mutex is safe to use after the linker + mutex() { nsync::nsync_mu_init(&mu_); } + // The default implementation of nsync_mutex is safe to use after the linker // initializations explicit mutex(LinkerInitialized x) {} - void lock() ACQUIRE() { std::mutex::lock(); } + void lock() EXCLUSIVE_LOCK_FUNCTION() { nsync::nsync_mu_lock(&mu_); } bool try_lock() EXCLUSIVE_TRYLOCK_FUNCTION(true) { - return std::mutex::try_lock(); + return nsync::nsync_mu_trylock(&mu_) != 0; }; - void unlock() RELEASE() { std::mutex::unlock(); } + void unlock() UNLOCK_FUNCTION() { nsync::nsync_mu_unlock(&mu_); } + + void lock_shared() SHARED_LOCK_FUNCTION() { nsync::nsync_mu_rlock(&mu_); } + bool try_lock_shared() SHARED_TRYLOCK_FUNCTION(true) { + return nsync::nsync_mu_rtrylock(&mu_) != 0; + }; + void unlock_shared() UNLOCK_FUNCTION() { nsync::nsync_mu_runlock(&mu_); } + + private: + friend class condition_variable; + nsync::nsync_mu mu_; }; -class SCOPED_LOCKABLE mutex_lock : public std::unique_lock<std::mutex> { +// Mimic a subset of the std::unique_lock<tensorflow::mutex> functionality. +class SCOPED_LOCKABLE mutex_lock { public: - mutex_lock(class mutex& m) ACQUIRE(m) : std::unique_lock<std::mutex>(m) {} - mutex_lock(class mutex& m, std::try_to_lock_t t) ACQUIRE(m) - : std::unique_lock<std::mutex>(m, t) {} - mutex_lock(mutex_lock&& ml) noexcept - : std::unique_lock<std::mutex>(std::move(ml)) {} - ~mutex_lock() RELEASE() {} + typedef ::tensorflow::mutex mutex_type; + + explicit mutex_lock(mutex_type& mu) EXCLUSIVE_LOCK_FUNCTION(mu) : mu_(&mu) { + mu_->lock(); + } + + mutex_lock(mutex_type& mu, std::try_to_lock_t) EXCLUSIVE_LOCK_FUNCTION(mu) + : mu_(&mu) { + if (!mu.try_lock()) { + mu_ = nullptr; + } + } + + // Manually nulls out the source to prevent double-free. + // (std::move does not null the source pointer by default.) + explicit mutex_lock(mutex_lock&& ml) noexcept : mu_(ml.mu_) { + ml.mu_ = nullptr; + } + ~mutex_lock() UNLOCK_FUNCTION() { + if (mu_ != nullptr) { + mu_->unlock(); + } + } + mutex_type* mutex() { return mu_; } + + operator bool() const { return mu_ != nullptr; } + + private: + mutex_type* mu_; }; // Catch bug where variable name is omitted, e.g. mutex_lock (mu); #define mutex_lock(x) static_assert(0, "mutex_lock_decl_missing_var_name"); -using std::condition_variable; +// Mimic a subset of the std::shared_lock<tensorflow::mutex> functionality. +// Name chosen to minimise conflicts with the tf_shared_lock macro, below. +class SCOPED_LOCKABLE tf_shared_lock { + public: + typedef ::tensorflow::mutex mutex_type; + + explicit tf_shared_lock(mutex_type& mu) SHARED_LOCK_FUNCTION(mu) : mu_(&mu) { + mu_->lock_shared(); + } + + tf_shared_lock(mutex_type& mu, std::try_to_lock_t) SHARED_LOCK_FUNCTION(mu) + : mu_(&mu) { + if (!mu.try_lock_shared()) { + mu_ = nullptr; + } + } + + // Manually nulls out the source to prevent double-free. + // (std::move does not null the source pointer by default.) + explicit tf_shared_lock(tf_shared_lock&& ml) noexcept : mu_(ml.mu_) { + ml.mu_ = nullptr; + } + ~tf_shared_lock() UNLOCK_FUNCTION() { + if (mu_ != nullptr) { + mu_->unlock_shared(); + } + } + mutex_type* mutex() { return mu_; } + + operator bool() const { return mu_ != nullptr; } + + private: + mutex_type* mu_; +}; + +// Catch bug where variable name is omitted, e.g. tf_shared_lock (mu); +#define tf_shared_lock(x) \ + static_assert(0, "tf_shared_lock_decl_missing_var_name"); + +// Mimic std::condition_variable. +class condition_variable { + public: + condition_variable() { nsync::nsync_cv_init(&cv_); } + + void wait(mutex_lock& lock) { + nsync::nsync_cv_wait(&cv_, &lock.mutex()->mu_); + } + template <class Rep, class Period> + std::cv_status wait_for(mutex_lock& lock, + std::chrono::duration<Rep, Period> dur) { + int r = nsync::nsync_cv_wait_with_deadline( + &cv_, &lock.mutex()->mu_, std::chrono::system_clock::now() + dur, + nullptr); + return r ? std::cv_status::timeout : std::cv_status::no_timeout; + } + void notify_one() { nsync::nsync_cv_signal(&cv_); } + void notify_all() { nsync::nsync_cv_broadcast(&cv_); } + + private: + friend ConditionResult WaitForMilliseconds(mutex_lock* mu, + condition_variable* cv, int64 ms); + nsync::nsync_cv cv_; +}; inline ConditionResult WaitForMilliseconds(mutex_lock* mu, condition_variable* cv, int64 ms) { diff --git a/tensorflow/core/platform/default/notification.h b/tensorflow/core/platform/default/notification.h index a78a3cd89b..b21779104c 100644 --- a/tensorflow/core/platform/default/notification.h +++ b/tensorflow/core/platform/default/notification.h @@ -54,8 +54,11 @@ class Notification { int64 timeout_in_us); bool WaitForNotificationWithTimeout(int64 timeout_in_us) { mutex_lock l(mu_); - return cv_.wait_for(l, std::chrono::microseconds(timeout_in_us), - [this]() { return notified_; }); + while (!notified_ && + cv_.wait_for(l, std::chrono::microseconds(timeout_in_us)) != + std::cv_status::timeout) { + } + return notified_; } mutex mu_; diff --git a/tensorflow/stream_executor/cuda/cuda_driver.cc b/tensorflow/stream_executor/cuda/cuda_driver.cc index 703baea1be..b6a96ed3e5 100644 --- a/tensorflow/stream_executor/cuda/cuda_driver.cc +++ b/tensorflow/stream_executor/cuda/cuda_driver.cc @@ -71,7 +71,7 @@ class CreatedContexts { public: // Returns whether context is a member of the live set. static bool Has(CUcontext context) { - shared_lock lock{mu_}; + tf_shared_lock lock{mu_}; return Live()->find(context) != Live()->end(); } diff --git a/tensorflow/stream_executor/host/host_stream.h b/tensorflow/stream_executor/host/host_stream.h index e22f49b1e6..9894d17feb 100644 --- a/tensorflow/stream_executor/host/host_stream.h +++ b/tensorflow/stream_executor/host/host_stream.h @@ -48,7 +48,7 @@ class HostStream : public internal::StreamInterface { mutex mu_; int pending_tasks_ GUARDED_BY(mu_) = 0; - ConditionVariableForMutex completion_condition_; + condition_variable completion_condition_; }; } // namespace host diff --git a/tensorflow/stream_executor/platform/default/mutex.h b/tensorflow/stream_executor/platform/default/mutex.h index ac2f123d5c..62de0cbce0 100644 --- a/tensorflow/stream_executor/platform/default/mutex.h +++ b/tensorflow/stream_executor/platform/default/mutex.h @@ -16,78 +16,24 @@ limitations under the License. #ifndef TENSORFLOW_STREAM_EXECUTOR_PLATFORM_DEFAULT_MUTEX_H_ #define TENSORFLOW_STREAM_EXECUTOR_PLATFORM_DEFAULT_MUTEX_H_ -#include <chrono> // NOLINT -#include <condition_variable> // NOLINT - -#include "tensorflow/stream_executor/platform/port.h" - -// std::shared_timed_mutex is a C++14 feature. -#if (__cplusplus >= 201402L) -#define STREAM_EXECUTOR_USE_SHARED_MUTEX -#endif // __cplusplus >= 201402L - -#ifdef STREAM_EXECUTOR_USE_SHARED_MUTEX -#include <shared_mutex> // NOLINT -#else -#include <mutex> // NOLINT -#endif +#include "tensorflow/stream_executor/platform/mutex.h" namespace perftools { namespace gputools { #undef mutex_lock -#undef shared_lock - -enum ConditionResult { kCond_Timeout, kCond_MaybeNotified }; - -#ifdef STREAM_EXECUTOR_USE_SHARED_MUTEX -typedef std::shared_timed_mutex BaseMutex; -typedef std::condition_variable_any ConditionVariableForMutex; -#else -typedef std::mutex BaseMutex; -typedef std::condition_variable ConditionVariableForMutex; -#endif - -// A class that wraps around the std::mutex implementation, only adding an -// additional LinkerInitialized constructor interface. -class LOCKABLE mutex : public BaseMutex { - public: - mutex() {} - // The default implementation of std::mutex is safe to use after the linker - // initializations - explicit mutex(LinkerInitialized x) {} +#undef tf_shared_lock - void lock() ACQUIRE() { BaseMutex::lock(); } - void unlock() RELEASE() { BaseMutex::unlock(); } -}; +using tensorflow::ConditionResult; +using tensorflow::WaitForMilliseconds; +using tensorflow::condition_variable; +using tensorflow::mutex; +using tensorflow::mutex_lock; +using tensorflow::tf_shared_lock; -class SCOPED_LOCKABLE mutex_lock : public std::unique_lock<BaseMutex> { - public: - mutex_lock(class mutex& m) ACQUIRE(m) : std::unique_lock<BaseMutex>(m) {} - ~mutex_lock() RELEASE() {} -}; - -// Catch bug where variable name is omitted, e.g. mutex_lock (mu); #define mutex_lock(x) static_assert(0, "mutex_lock_decl_missing_var_name"); - -#ifdef STREAM_EXECUTOR_USE_SHARED_MUTEX -// TODO(vrv): Annotate these with ACQUIRE_SHARED after implementing -// as classes. -typedef std::shared_lock<BaseMutex> shared_lock; -#else -typedef mutex_lock shared_lock; -#endif - -// Catch bug where variable name is omitted, e.g. shared_lock (mu); -#define shared_lock(x) static_assert(0, "shared_lock_decl_missing_var_name"); - -using std::condition_variable; - -inline ConditionResult WaitForMilliseconds(mutex_lock* mu, - ConditionVariableForMutex* cv, int64 ms) { - std::cv_status s = cv->wait_for(*mu, std::chrono::milliseconds(ms)); - return (s == std::cv_status::timeout) ? kCond_Timeout : kCond_MaybeNotified; -} +#define tf_shared_lock(x) \ + static_assert(0, "tf_shared_lock_decl_missing_var_name"); } // namespace gputools } // namespace perftools diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h index e218873839..9bd4c21a66 100644 --- a/tensorflow/stream_executor/stream.h +++ b/tensorflow/stream_executor/stream.h @@ -1845,7 +1845,7 @@ class Stream { friend class ocl::CLBlas; // for parent_. bool InErrorState() const { - shared_lock lock{mu_}; + tf_shared_lock lock{mu_}; return !ok_; } diff --git a/tensorflow/stream_executor/stream_executor_pimpl.cc b/tensorflow/stream_executor/stream_executor_pimpl.cc index b3eefe0299..4cac6f6c96 100644 --- a/tensorflow/stream_executor/stream_executor_pimpl.cc +++ b/tensorflow/stream_executor/stream_executor_pimpl.cc @@ -119,7 +119,7 @@ class ScopedTracer { void Trace(CallbackT callback, TraceArgsT... args) { { // Instance tracers held in a block to limit the lock lifetime. - shared_lock lock{stream_exec_->mu_}; + tf_shared_lock lock{stream_exec_->mu_}; for (TraceListener *listener : stream_exec_->listeners_) { (listener->*callback)(correlation_id_, std::forward<TraceArgsT>(args)...); @@ -229,7 +229,7 @@ void StreamExecutor::Deallocate(DeviceMemoryBase *mem) { } void StreamExecutor::GetMemAllocs(std::map<void *, AllocRecord> *records_out) { - shared_lock lock{mu_}; + tf_shared_lock lock{mu_}; *records_out = mem_allocs_; } @@ -754,7 +754,7 @@ void StreamExecutor::SubmitTrace(TraceCallT trace_call, ArgsT &&... args) { if (tracing_enabled_) { { // instance tracers held in a block to limit the lock lifetime. - shared_lock lock{mu_}; + tf_shared_lock lock{mu_}; for (TraceListener *listener : listeners_) { (listener->*trace_call)(std::forward<ArgsT>(args)...); } diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index ff5ca12ef8..28cb0bd61c 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -896,6 +896,7 @@ def cc_header_only_library(name, deps=[], **kwargs): def tf_custom_op_library_additional_deps(): return [ "@protobuf_archive//:protobuf_headers", + "@nsync//:nsync_headers", clean_dep("//third_party/eigen3"), clean_dep("//tensorflow/core:framework_headers_lib"), ] diff --git a/tensorflow/tf_exported_symbols.lds b/tensorflow/tf_exported_symbols.lds index 1f4d900ec2..4597d929a1 100644 --- a/tensorflow/tf_exported_symbols.lds +++ b/tensorflow/tf_exported_symbols.lds @@ -2,3 +2,4 @@ *perftools*gputools* *tf_* TF_* +*nsync_* diff --git a/tensorflow/tf_version_script.lds b/tensorflow/tf_version_script.lds index b368f7cf21..88b64eb1f0 100644 --- a/tensorflow/tf_version_script.lds +++ b/tensorflow/tf_version_script.lds @@ -3,6 +3,7 @@ tensorflow { *tensorflow*; *perftools*gputools*; TF_*; + *nsync_*; local: *; }; diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD index 536437df2b..494ddd2f5d 100644 --- a/tensorflow/tools/lib_package/BUILD +++ b/tensorflow/tools/lib_package/BUILD @@ -101,6 +101,7 @@ genrule( "@libxsmm_archive//:LICENSE", "@lmdb//:LICENSE", "@local_config_sycl//sycl:LICENSE.text", + "@nsync//:LICENSE", "@png_archive//:LICENSE", "@protobuf_archive//:LICENSE", "@snappy//:COPYING", @@ -135,6 +136,7 @@ genrule( "@libxsmm_archive//:LICENSE", "@lmdb//:LICENSE", "@local_config_sycl//sycl:LICENSE.text", + "@nsync//:LICENSE", "@png_archive//:LICENSE", "@protobuf_archive//:LICENSE", "@snappy//:COPYING", diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index b3296a9b44..ea25805de8 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -120,6 +120,7 @@ filegroup( "@lmdb//:LICENSE", "@local_config_sycl//sycl:LICENSE.text", "@grpc//third_party/nanopb:LICENSE.txt", + "@nsync//:LICENSE", "@png_archive//:LICENSE", "@protobuf_archive//:LICENSE", "@six_archive//:LICENSE", diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 6e4bb3bc34..bb140946de 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -400,6 +400,15 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ) native.http_archive( + name = "nsync", + urls = [ + "https://github.com/google/nsync/archive/215217c445e27cd76c27e45960c7b4721e59a4d9.tar.gz", + ], + sha256 = "355a99d88c2ae1fb2838d75ce99b9042d547edc0133c5271d06804160091ac8a", + strip_prefix = "nsync-215217c445e27cd76c27e45960c7b4721e59a4d9", + ) + + native.http_archive( name = "com_google_googletest", urls = [ "http://mirror.bazel.build/github.com/google/googletest/archive/9816b96a6ddc0430671693df90192bbee57108b6.zip", |