73 files changed, 1037 insertions, 315 deletions
diff --git a/configure b/configure
index feac140664..eccc204945 100755
--- a/configure
+++ b/configure
@@ -5,6 +5,11 @@ pushd `dirname $0` #> /dev/null
 SOURCE_BASE_DIR=`pwd -P`
 popd > /dev/null
 
+function bazel_clean_and_fetch() {
+  bazel clean --expunge
+  bazel fetch //tensorflow/...
+}
+
 ## Set up python-related environment settings
 while true; do
   fromuser=""
@@ -114,6 +119,7 @@ done
 export TF_NEED_CUDA
 if [ "$TF_NEED_CUDA" == "0" ]; then
   echo "Configuration finished"
+  bazel_clean_and_fetch
   exit
 fi
 
@@ -300,7 +306,6 @@ EOF
   TF_CUDA_COMPUTE_CAPABILITIES=""
 done
 
-bazel clean --expunge
-bazel fetch //...
+bazel_clean_and_fetch
 
 echo "Configuration finished"
diff --git a/farmhash.BUILD b/farmhash.BUILD
index 8111cd61f9..b41c799f8f 100644
--- a/farmhash.BUILD
+++ b/farmhash.BUILD
@@ -1,9 +1,22 @@
 licenses(["notice"])  # MIT
 
+config_setting(
+    name = "windows",
+    values = {
+        "cpu": "x64_windows_msvc",
+    },
+)
+
+
 cc_library(
     name = "farmhash",
     srcs = ["farmhash.cc"],
     hdrs = ["farmhash.h"],
+    # Disable __builtin_expect support on Windows
+    copts = select({
+        ":windows" : ["/DFARMHASH_OPTIONAL_BUILTIN_EXPECT"],
+        "//conditions:default" : [],
+    }),
     includes = ["."],
     visibility = ["//visibility:public"],
 )
diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h
index 7aafe53b85..facdd6e233 100644
--- a/tensorflow/c/c_api.h
+++ b/tensorflow/c/c_api.h
@@ -582,9 +582,9 @@ typedef struct {
 } TF_AttrMetadata;
 
 // Returns metadata about the value of the attribute `attr_name` of `oper`.
-TF_AttrMetadata TF_OperationGetAttrMetadata(TF_Operation* oper,
-                                            const char* attr_name,
-                                            TF_Status* status);
+extern TF_AttrMetadata TF_OperationGetAttrMetadata(TF_Operation* oper,
+                                                   const char* attr_name,
+                                                   TF_Status* status);
 
 // Fills in `value` with the value of the attribute `attr_name`.  `value` must
 // point to an array of length at least `max_length` (ideally set to
diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index 70a21af8f3..e388440371 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -59,6 +59,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         "//tensorflow/contrib/factorization:all_ops",
+        "//tensorflow/contrib/framework:all_ops",
         "//tensorflow/contrib/layers:bucketization_op_op_lib",
         "//tensorflow/contrib/layers:sparse_feature_cross_op_op_lib",
         "//tensorflow/contrib/metrics:set_ops_op_lib",
diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 3556e143b8..4f4497618b 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -16,6 +16,9 @@ cmake_policy(SET CMP0022 NEW)
 # Options
 option(tensorflow_VERBOSE "Enable for verbose output" OFF)
 option(tensorflow_BUILD_TESTS "Build tests" ON)
+option(tensorflow_ENABLE_SSL_SUPPORT "Enable boringssl support" OFF)
+option(tensorflow_BUILD_CC_EXAMPLE "Build the C++ tutorial example" ON)
+option(tensorflow_BUILD_PYTHON_BINDINGS "Build the Python bindings" ON)
 
 #Threads: defines CMAKE_THREAD_LIBS_INIT and adds -pthread compile option for
 # targets that link ${CMAKE_THREAD_LIBS_INIT}.
@@ -44,10 +47,11 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 include(gif)
 include(png)
 include(jpeg)
-include(re2)
 include(eigen)
 include(jsoncpp)
-include(boringssl)
+if(tensorflow_ENABLE_SSL_SUPPORT)
+  include(boringssl)
+endif()
 include(farmhash)
 include(highwayhash)
 include(protobuf)
@@ -64,9 +68,13 @@ include(tf_core_direct_session.cmake)
 include(tf_core_distributed_runtime.cmake)
 include(tf_core_kernels.cmake)
 include(tf_cc_ops.cmake)
-include(tf_tutorials.cmake)
 include(tf_tools.cmake)
-include(tf_python.cmake)
+if(tensorflow_BUILD_CC_EXAMPLE)
+  include(tf_tutorials.cmake)
+endif()
+if(tensorflow_BUILD_PYTHON_BINDINGS)
+  include(tf_python.cmake)
+endif()
 
 if (tensorflow_BUILD_TESTS)
   include(tests.cmake)
diff --git a/tensorflow/contrib/cmake/external/boringssl.cmake b/tensorflow/contrib/cmake/external/boringssl.cmake
index 7d6245f0c3..b91c7bf383 100644
--- a/tensorflow/contrib/cmake/external/boringssl.cmake
+++ b/tensorflow/contrib/cmake/external/boringssl.cmake
@@ -27,6 +27,6 @@ ExternalProject_Add(boringssl
     CMAKE_CACHE_ARGS
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
 
diff --git a/tensorflow/contrib/cmake/external/farmhash.cmake b/tensorflow/contrib/cmake/external/farmhash.cmake
index 11397fe483..a68e4cc422 100644
--- a/tensorflow/contrib/cmake/external/farmhash.cmake
+++ b/tensorflow/contrib/cmake/external/farmhash.cmake
@@ -1,38 +1,51 @@
 include (ExternalProject)
 
 set(farmhash_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/farmhash_archive)
-#set(farmhash_EXTRA_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/farmhash/src)
 set(farmhash_URL https://github.com/google/farmhash/archive/34c13ddfab0e35422f4c3979f360635a8c050260.zip)
 set(farmhash_HASH SHA256=e3d37a59101f38fd58fb799ed404d630f0eee18bfc2a2433910977cc8fea9c28)
 set(farmhash_BUILD ${CMAKE_BINARY_DIR}/farmhash/src/farmhash)
 set(farmhash_INSTALL ${CMAKE_BINARY_DIR}/farmhash/install)
-#set(farmhash_LIBRARIES ${farmhash_BUILD}/obj/so/libfarmhash.so)
-set(farmhash_STATIC_LIBRARIES
-    ${farmhash_INSTALL}/lib/libfarmhash.a
-)
 set(farmhash_INCLUDES ${farmhash_BUILD})
-
 set(farmhash_HEADERS
     "${farmhash_BUILD}/src/farmhash.h"
 )
 
-ExternalProject_Add(farmhash
-    PREFIX farmhash
-    URL ${farmhash_URL}
-    URL_HASH ${farmhash_HASH}
-    DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
-    BUILD_COMMAND $(MAKE)
-    INSTALL_COMMAND $(MAKE) install
-    CONFIGURE_COMMAND
-        ${farmhash_BUILD}/configure
-        --prefix=${farmhash_INSTALL}
-        --enable-shared=yes
-	CXXFLAGS=-fPIC
-)
+if(WIN32)
+  set(farmhash_STATIC_LIBRARIES ${farmhash_INSTALL}/lib/farmhash.lib)
+
+  ExternalProject_Add(farmhash
+      PREFIX farmhash
+      URL ${farmhash_URL}
+      URL_HASH ${farmhash_HASH}
+      DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
+      BUILD_IN_SOURCE 1
+      PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/farmhash/CMakeLists.txt ${farmhash_BUILD}
+      INSTALL_DIR ${farmhash_INSTALL}
+      CMAKE_CACHE_ARGS
+          -DCMAKE_BUILD_TYPE:STRING=Release
+          -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
+          -DCMAKE_INSTALL_PREFIX:STRING=${farmhash_INSTALL})
+else()
+  set(farmhash_STATIC_LIBRARIES ${farmhash_INSTALL}/lib/libfarmhash.a)
+
+  ExternalProject_Add(farmhash
+      PREFIX farmhash
+      URL ${farmhash_URL}
+      URL_HASH ${farmhash_HASH}
+      DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
+      BUILD_COMMAND $(MAKE)
+      INSTALL_COMMAND $(MAKE) install
+      CONFIGURE_COMMAND
+          ${farmhash_BUILD}/configure
+          --prefix=${farmhash_INSTALL}
+          --enable-shared=yes
+          CXXFLAGS=-fPIC)
+
+endif()
 
 # put farmhash includes in the directory where they are expected
 add_custom_target(farmhash_create_destination_dir
-    COMMAND ${CMAKE_COMMAND} -E make_directory ${farmhash_INCLUDE_DIR}/farmhash-34c13ddfab0e35422f4c3979f360635a8c050260/src
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${farmhash_INCLUDE_DIR}
     DEPENDS farmhash)
 
 add_custom_target(farmhash_copy_headers_to_destination
@@ -40,5 +53,5 @@ add_custom_target(farmhash_copy_headers_to_destination
 
 foreach(header_file ${farmhash_HEADERS})
     add_custom_command(TARGET farmhash_copy_headers_to_destination PRE_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${farmhash_INCLUDE_DIR}/farmhash-34c13ddfab0e35422f4c3979f360635a8c050260/src)
+    COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${farmhash_INCLUDE_DIR}/)
 endforeach()
diff --git a/tensorflow/contrib/cmake/external/gif.cmake b/tensorflow/contrib/cmake/external/gif.cmake
index 32c6369067..021c2d676e 100644
--- a/tensorflow/contrib/cmake/external/gif.cmake
+++ b/tensorflow/contrib/cmake/external/gif.cmake
@@ -1,6 +1,6 @@
 include (ExternalProject)
 
-set(gif_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/gif_archive)
+set(gif_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/gif_archive/giflib-5.1.4/)
 set(gif_URL http://ufpr.dl.sourceforge.net/project/giflib/giflib-5.1.4.tar.gz)
 set(gif_HASH SHA256=34a7377ba834397db019e8eb122e551a49c98f49df75ec3fcc92b9a794a4f6d1)
 set(gif_INSTALL ${CMAKE_BINARY_DIR}/gif/install)
@@ -29,7 +29,7 @@ ExternalProject_Add(gif
 
 # put gif includes in the directory where they are expected
 add_custom_target(gif_create_destination_dir
-    COMMAND ${CMAKE_COMMAND} -E make_directory ${gif_INCLUDE_DIR}/giflib-5.1.4/lib
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${gif_INCLUDE_DIR}
     DEPENDS gif)
 
 add_custom_target(gif_copy_headers_to_destination
@@ -37,5 +37,5 @@ add_custom_target(gif_copy_headers_to_destination
 
 foreach(header_file ${gif_HEADERS})
     add_custom_command(TARGET gif_copy_headers_to_destination PRE_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${gif_INCLUDE_DIR}/giflib-5.1.4/lib/)
+    COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${gif_INCLUDE_DIR}/)
 endforeach()
diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake
index 30ddcad6d7..6684c05142 100644
--- a/tensorflow/contrib/cmake/external/grpc.cmake
+++ b/tensorflow/contrib/cmake/external/grpc.cmake
@@ -4,11 +4,19 @@ set(GRPC_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/include)
 set(GRPC_URL https://github.com/grpc/grpc.git)
 set(GRPC_BUILD ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc)
 set(GRPC_TAG 3bc78cd0b5bd784a235c01612d634b1ec5f8fb97)
-set(GRPC_LIBRARIES
-    ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc++_unsecure.a
-    ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc_unsecure.a
-    ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgpr.a)
 
+if(WIN32)
+  set(GRPC_LIBRARIES
+      ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/${CMAKE_BUILD_TYPE}/grpc++_unsecure.lib
+      ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/${CMAKE_BUILD_TYPE}/grpc_unsecure.lib
+      ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/${CMAKE_BUILD_TYPE}/gpr.lib)
+else()
+  set(GRPC_LIBRARIES
+      ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc++_unsecure.a
+      ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc_unsecure.a
+      ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgpr.a)
+endif()
+  
 ExternalProject_Add(grpc
     PREFIX grpc
     DEPENDS protobuf
diff --git a/tensorflow/contrib/cmake/external/highwayhash.cmake b/tensorflow/contrib/cmake/external/highwayhash.cmake
index fec44c2898..7de67daee9 100644
--- a/tensorflow/contrib/cmake/external/highwayhash.cmake
+++ b/tensorflow/contrib/cmake/external/highwayhash.cmake
@@ -1,31 +1,10 @@
 include (ExternalProject)
 
 set(highwayhash_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/highwayhash)
-#set(highwayhash_EXTRA_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/highwayhash/src)
 set(highwayhash_URL https://github.com/google/highwayhash.git)
 set(highwayhash_TAG be5edafc2e1a455768e260ccd68ae7317b6690ee)
 set(highwayhash_BUILD ${CMAKE_BINARY_DIR}/highwayhash/src/highwayhash)
 set(highwayhash_INSTALL ${CMAKE_BINARY_DIR}/highwayhash/install)
-#set(highwayhash_LIBRARIES ${highwayhash_BUILD}/obj/so/libhighwayhash.so)
-set(highwayhash_STATIC_LIBRARIES
-    ${highwayhash_INSTALL}/lib/libhighwayhash.a
-)
-set(highwayhash_INCLUDES ${highwayhash_BUILD})
-
-set(highwayhash_HEADERS
-    "${highwayhash_BUILD}/highwayhash/*.h"
-)
-
-ExternalProject_Add(highwayhash
-    PREFIX highwayhash
-    GIT_REPOSITORY ${highwayhash_URL}
-    GIT_TAG ${highwayhash_TAG}
-    DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
-    BUILD_IN_SOURCE 1
-    BUILD_COMMAND $(MAKE)
-    CONFIGURE_COMMAND ""
-    INSTALL_COMMAND ""
-)
 
 # put highwayhash includes in the directory where they are expected
 add_custom_target(highwayhash_create_destination_dir
@@ -35,7 +14,44 @@ add_custom_target(highwayhash_create_destination_dir
 add_custom_target(highwayhash_copy_headers_to_destination
     DEPENDS highwayhash_create_destination_dir)
 
-foreach(header_file ${highwayhash_HEADERS})
+if(WIN32)
+  set(highwayhash_HEADERS "${highwayhash_BUILD}/highwayhash/*.h")
+  set(highwayhash_STATIC_LIBRARIES ${highwayhash_INSTALL}/lib/highwayhash.lib)
+
+  ExternalProject_Add(highwayhash
+      PREFIX highwayhash
+      GIT_REPOSITORY ${highwayhash_URL}
+      GIT_TAG ${highwayhash_TAG}
+      DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
+      BUILD_IN_SOURCE 1
+      PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/highwayhash/CMakeLists.txt ${highwayhash_BUILD}
+      INSTALL_DIR ${highwayhash_INSTALL}
+      CMAKE_CACHE_ARGS
+          -DCMAKE_BUILD_TYPE:STRING=Release
+          -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
+          -DCMAKE_INSTALL_PREFIX:STRING=${highwayhash_INSTALL})
+
+  add_custom_command(TARGET highwayhash_copy_headers_to_destination PRE_BUILD
+      COMMAND ${CMAKE_COMMAND} -E copy_directory ${highwayhash_INSTALL}/include/ ${highwayhash_INCLUDE_DIR}/highwayhash)
+
+else()
+
+  set(highwayhash_HEADERS "${highwayhash_BUILD}/highwayhash/*.h")
+  set(highwayhash_STATIC_LIBRARIES ${highwayhash_INSTALL}/lib/libhighwayhash.a)
+
+  ExternalProject_Add(highwayhash
+      PREFIX highwayhash
+      GIT_REPOSITORY ${highwayhash_URL}
+      GIT_TAG ${highwayhash_TAG}
+      DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
+      BUILD_IN_SOURCE 1
+      BUILD_COMMAND $(MAKE)
+      CONFIGURE_COMMAND ""
+      INSTALL_COMMAND "")
+
+  foreach(header_file ${highwayhash_HEADERS})
     add_custom_command(TARGET highwayhash_copy_headers_to_destination PRE_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${highwayhash_INCLUDE_DIR}/highwayhash)
-endforeach()
+        COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${highwayhash_INCLUDE_DIR}/highwayhash)
+  endforeach()
+
+endif()
diff --git a/tensorflow/contrib/cmake/external/jpeg.cmake b/tensorflow/contrib/cmake/external/jpeg.cmake
index edef25a35b..a94eb65ddb 100644
--- a/tensorflow/contrib/cmake/external/jpeg.cmake
+++ b/tensorflow/contrib/cmake/external/jpeg.cmake
@@ -5,7 +5,12 @@ set(jpeg_URL http://www.ijg.org/files/jpegsrc.v9a.tar.gz)
 set(jpeg_HASH SHA256=3a753ea48d917945dd54a2d97de388aa06ca2eb1066cbfdc6652036349fe05a7)
 set(jpeg_BUILD ${CMAKE_BINARY_DIR}/jpeg/src/jpeg)
 set(jpeg_INSTALL ${CMAKE_BINARY_DIR}/jpeg/install)
-set(jpeg_STATIC_LIBRARIES ${jpeg_INSTALL}/lib/libjpeg.a)
+
+if(WIN32)
+  set(jpeg_STATIC_LIBRARIES ${jpeg_INSTALL}/lib/libjpeg.lib)
+else()
+  set(jpeg_STATIC_LIBRARIES ${jpeg_INSTALL}/lib/libjpeg.a)
+endif()
 
 set(jpeg_HEADERS
     "${jpeg_INSTALL}/include/jconfig.h"
@@ -63,7 +68,7 @@ endif()
 
 # put jpeg includes in the directory where they are expected
 add_custom_target(jpeg_create_destination_dir
-    COMMAND ${CMAKE_COMMAND} -E make_directory ${jpeg_INCLUDE_DIR}/jpeg-9a
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${jpeg_INCLUDE_DIR}
     DEPENDS jpeg)
 
 add_custom_target(jpeg_copy_headers_to_destination
@@ -71,5 +76,5 @@ add_custom_target(jpeg_copy_headers_to_destination
 
 foreach(header_file ${jpeg_HEADERS})
     add_custom_command(TARGET jpeg_copy_headers_to_destination PRE_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${jpeg_INCLUDE_DIR}/jpeg-9a)
+    COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${jpeg_INCLUDE_DIR})
 endforeach()
diff --git a/tensorflow/contrib/cmake/external/jsoncpp.cmake b/tensorflow/contrib/cmake/external/jsoncpp.cmake
index 22d8139b45..75d5d72703 100644
--- a/tensorflow/contrib/cmake/external/jsoncpp.cmake
+++ b/tensorflow/contrib/cmake/external/jsoncpp.cmake
@@ -6,9 +6,14 @@ set(jsoncpp_URL https://github.com/open-source-parsers/jsoncpp.git)
 set(jsoncpp_TAG 4356d9b)
 set(jsoncpp_BUILD ${CMAKE_BINARY_DIR}/jsoncpp/src/jsoncpp/src/lib_json)
 set(jsoncpp_LIBRARIES ${jsoncpp_BUILD}/obj/so/libjsoncpp.so)
-get_filename_component(jsoncpp_STATIC_LIBRARIES ${jsoncpp_BUILD}/libjsoncpp.a ABSOLUTE)
 set(jsoncpp_INCLUDES ${jsoncpp_BUILD})
 
+if(WIN32)
+  set(jsoncpp_STATIC_LIBRARIES ${jsoncpp_BUILD}/${CMAKE_BUILD_TYPE}/jsoncpp.lib)
+else()
+  set(jsoncpp_STATIC_LIBRARIES ${jsoncpp_BUILD}/libjsoncpp.a)
+endif()
+
 # We only need jsoncpp.h in external/jsoncpp/jsoncpp/jsoncpp.h
 # For the rest, we'll just add the build dir as an include dir.
 set(jsoncpp_HEADERS
diff --git a/tensorflow/contrib/cmake/external/png.cmake b/tensorflow/contrib/cmake/external/png.cmake
index 56d6ae7050..2be5aa70af 100644
--- a/tensorflow/contrib/cmake/external/png.cmake
+++ b/tensorflow/contrib/cmake/external/png.cmake
@@ -5,7 +5,12 @@ set(png_URL https://storage.googleapis.com/libpng-public-archive/libpng-1.2.53.t
 set(png_HASH SHA256=e05c9056d7f323088fd7824d8c6acc03a4a758c4b4916715924edc5dd3223a72)
 set(png_BUILD ${CMAKE_BINARY_DIR}/png/src/png)
 set(png_INSTALL ${CMAKE_BINARY_DIR}/png/install)
-set(png_STATIC_LIBRARIES ${CMAKE_BINARY_DIR}/png/install/lib/libpng12.a)
+
+if(WIN32)
+  set(png_STATIC_LIBRARIES ${CMAKE_BINARY_DIR}/png/install/lib/libpng12_static.lib)
+else()
+  set(png_STATIC_LIBRARIES ${CMAKE_BINARY_DIR}/png/install/lib/libpng12.a)
+endif()
 
 set(png_HEADERS
     "${png_INSTALL}/include/libpng12/png.h"
@@ -27,13 +32,13 @@ ExternalProject_Add(png
 
 ## put png includes in the directory where they are expected
 add_custom_target(png_create_destination_dir
-    COMMAND ${CMAKE_COMMAND} -E make_directory ${png_INCLUDE_DIR}/libpng-1.2.53
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${png_INCLUDE_DIR}
     DEPENDS png)
 
 add_custom_target(png_copy_headers_to_destination
     DEPENDS png_create_destination_dir)
 
 foreach(header_file ${png_HEADERS})
-    add_custom_command(TARGET png_copy_headers_to_destination PRE_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${png_INCLUDE_DIR}/libpng-1.2.53)
+  add_custom_command(TARGET png_copy_headers_to_destination PRE_BUILD
+      COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${png_INCLUDE_DIR}/)
 endforeach()
diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake
index dc74e9a338..abde06ad31 100644
--- a/tensorflow/contrib/cmake/external/protobuf.cmake
+++ b/tensorflow/contrib/cmake/external/protobuf.cmake
@@ -1,10 +1,16 @@
 include (ExternalProject)
 
 set(PROTOBUF_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src)
-set(PROTOBUF_URL https://github.com/google/protobuf/releases/download/v3.0.0/protobuf-cpp-3.0.0.zip)
-set(PROTOBUF_HASH SHA256=e886ea7d08267fc3d866ac42d6dd7461ae11c491836adef6f34c04cad0be3078)
-set(PROTOBUF_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/libprotobuf.a)
-set(PROTOBUF_PROTOC_EXECUTABLE ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/protoc)
+set(PROTOBUF_URL https://github.com/google/protobuf/releases/download/v3.1.0/protobuf-cpp-3.1.0.zip)
+set(PROTOBUF_HASH SHA256=0c18ccc99e921c407f359047f9b56cca196c3ab36eed79e5979df6c1f9e623b7)
+
+if(WIN32)
+  set(PROTOBUF_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/${CMAKE_BUILD_TYPE}/libprotobuf.lib)
+  set(PROTOBUF_PROTOC_EXECUTABLE ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/${CMAKE_BUILD_TYPE}/protoc.exe)
+else()
+  set(PROTOBUF_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/libprotobuf.a)
+  set(PROTOBUF_PROTOC_EXECUTABLE ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/protoc)
+endif()
 
 ExternalProject_Add(protobuf
     PREFIX protobuf
@@ -12,11 +18,11 @@ ExternalProject_Add(protobuf
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     BUILD_IN_SOURCE 1
     SOURCE_DIR ${CMAKE_BINARY_DIR}/protobuf/src/protobuf
-    CONFIGURE_COMMAND ${CMAKE_COMMAND} cmake/ -Dprotobuf_BUILD_TESTS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+    CONFIGURE_COMMAND ${CMAKE_COMMAND} cmake/ -Dprotobuf_BUILD_TESTS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -Dprotobuf_MSVC_STATIC_RUNTIME:BOOL=OFF
     INSTALL_COMMAND ""
     CMAKE_CACHE_ARGS
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+        -Dprotobuf_MSVC_STATIC_RUNTIME:BOOL=OFF
+        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
-
diff --git a/tensorflow/contrib/cmake/external/re2.cmake b/tensorflow/contrib/cmake/external/re2.cmake
deleted file mode 100644
index 1da2ff6be2..0000000000
--- a/tensorflow/contrib/cmake/external/re2.cmake
+++ /dev/null
@@ -1,50 +0,0 @@
-include (ExternalProject)
-
-set(re2_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/re2/re2
-    ${CMAKE_CURRENT_BINARY_DIR}/external/re2)
-set(re2_EXTRA_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/re2/src/re2)
-set(re2_URL https://github.com/google/re2.git)
-set(re2_TAG 791beff)
-set(re2_BUILD ${CMAKE_BINARY_DIR}/re2/src/re2)
-set(re2_LIBRARIES ${re2_BUILD}/obj/so/libre2.so)
-get_filename_component(re2_STATIC_LIBRARIES ${re2_BUILD}/libre2.a ABSOLUTE)
-set(re2_INCLUDES ${re2_BUILD})
-
-# We only need re2.h in external/re2/re2/re2.h
-# For the rest, we'll just add the build dir as an include dir.
-set(re2_HEADERS
-    "${re2_BUILD}/re2/re2.h"
-    "${re2_BUILD}/re2/stringpiece.h"
-    "${re2_BUILD}/re2/variadic_function.h"
-)
-
-ExternalProject_Add(re2
-    PREFIX re2
-    GIT_REPOSITORY ${re2_URL}
-    GIT_TAG ${re2_TAG}
-    DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
-    BUILD_IN_SOURCE 1
-    INSTALL_COMMAND ""
-    CMAKE_CACHE_ARGS
-        -DCMAKE_BUILD_TYPE:STRING=Release
-        -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-)
-
-## put re2 includes in the directory where they are expected
-add_custom_target(re2_create_destination_dir
-    COMMAND ${CMAKE_COMMAND} -E make_directory ${re2_INCLUDE_DIR}/re2
-    DEPENDS re2)
-
-add_custom_target(re2_copy_headers_to_destination
-    DEPENDS re2_create_destination_dir)
-
-foreach(header_file ${re2_HEADERS})
-    add_custom_command(TARGET re2_copy_headers_to_destination PRE_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${re2_INCLUDE_DIR}/re2)
-endforeach()
-
-ADD_LIBRARY(re2_lib STATIC IMPORTED
-    DEPENDS re2)
-SET_TARGET_PROPERTIES(re2_lib PROPERTIES
-    IMPORTED_LOCATION ${re2_STATIC_LIBRARIES})
diff --git a/tensorflow/contrib/cmake/patches/farmhash/CMakeLists.txt b/tensorflow/contrib/cmake/patches/farmhash/CMakeLists.txt
new file mode 100644
index 0000000000..0286f29ad0
--- /dev/null
+++ b/tensorflow/contrib/cmake/patches/farmhash/CMakeLists.txt
@@ -0,0 +1,25 @@
+cmake_minimum_required(VERSION 2.8.3)
+
+project(farmhash)
+
+set(FARMHASH_SRCS
+    "src/farmhash.h"
+    "src/farmhash.cc"
+)
+
+set(FARMHASH_INCLUDES
+    "src/farmhash.h"
+)
+
+include_directories("${CMAKE_CURRENT_SOURCE_DIR}")
+
+add_library(farmhash ${FARMHASH_SRCS})
+add_definitions(-DFARMHASH_NO_BUILTIN_EXPECT)
+
+install(TARGETS farmhash
+  LIBRARY DESTINATION lib COMPONENT RuntimeLibraries
+  ARCHIVE DESTINATION lib COMPONENT Development)
+
+foreach(FARMHASH_INCLUDE ${FARMHASH_INCLUDES})
+  install(FILES ${FARMHASH_INCLUDE} DESTINATION include COMPONENT Development)
+endforeach()
diff --git a/tensorflow/contrib/cmake/patches/grpc/CMakeLists.txt b/tensorflow/contrib/cmake/patches/grpc/CMakeLists.txt
index 6cc2e4e375..ce8a0cb72c 100644
--- a/tensorflow/contrib/cmake/patches/grpc/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/patches/grpc/CMakeLists.txt
@@ -47,6 +47,11 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -std=c11")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
 
+if(WIN32)
+  add_definitions(-D_WIN32_WINNT=0x0A00)
+  find_package(ZLIB REQUIRED)
+endif(WIN32)
+
 add_library(gpr
   src/core/lib/profiling/basic_timers.c
   src/core/lib/profiling/stap_timers.c
@@ -99,6 +104,7 @@ target_include_directories(gpr
   PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
   PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include
   PRIVATE ${PROTOBUF_INCLUDE_DIRS}
+  PRIVATE ${ZLIB_INCLUDE_DIRS}
 )
 
 add_library(grpc_unsecure
@@ -265,6 +271,7 @@ target_include_directories(grpc_unsecure
   PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
   PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include
   PRIVATE ${PROTOBUF_ROOT_DIR}/src
+  PRIVATE ${ZLIB_INCLUDE_DIRS}
 )
 
 target_link_libraries(grpc_unsecure
@@ -306,6 +313,7 @@ target_include_directories(grpc++_unsecure
   PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
   PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include
   PRIVATE ${PROTOBUF_INCLUDE_DIRS}
+  PRIVATE ${ZLIB_INCLUDE_DIRS}
 )
 
 target_link_libraries(grpc++_unsecure
diff --git a/tensorflow/contrib/cmake/patches/highwayhash/CMakeLists.txt b/tensorflow/contrib/cmake/patches/highwayhash/CMakeLists.txt
new file mode 100644
index 0000000000..94bb62f95f
--- /dev/null
+++ b/tensorflow/contrib/cmake/patches/highwayhash/CMakeLists.txt
@@ -0,0 +1,49 @@
+cmake_minimum_required(VERSION 2.8.3)
+
+project(highwayhash)
+
+set(HIGHWAYHASH_SRCS
+    "highwayhash/code_annotation.h"
+    "highwayhash/highway_tree_hash.cc"
+    "highwayhash/highway_tree_hash.h"
+    "highwayhash/scalar_highway_tree_hash.cc"
+    "highwayhash/scalar_highway_tree_hash.h"
+    "highwayhash/scalar_sip_tree_hash.cc"
+    "highwayhash/scalar_sip_tree_hash.h"
+    "highwayhash/sip_hash.cc"
+    "highwayhash/sip_hash.h"
+    "highwayhash/sip_tree_hash.cc"
+    "highwayhash/sip_tree_hash.h"
+    "highwayhash/sse41_highway_tree_hash.cc"
+    "highwayhash/sse41_highway_tree_hash.h"
+    "highwayhash/state_helpers.h"
+    "highwayhash/types.h"
+    "highwayhash/vec.h"
+    "highwayhash/vec2.h"
+)
+
+set(HIGHWAYHASH_INCLUDES
+    "highwayhash/code_annotation.h"
+    "highwayhash/highway_tree_hash.h"
+    "highwayhash/scalar_highway_tree_hash.h"
+    "highwayhash/scalar_sip_tree_hash.h"
+    "highwayhash/sip_hash.h"
+    "highwayhash/sip_tree_hash.h"
+    "highwayhash/sse41_highway_tree_hash.h"
+    "highwayhash/state_helpers.h"
+    "highwayhash/types.h"
+    "highwayhash/vec.h"
+    "highwayhash/vec2.h"
+)
+
+include_directories("${CMAKE_CURRENT_SOURCE_DIR}")
+
+add_library(highwayhash ${HIGHWAYHASH_SRCS})
+
+install(TARGETS highwayhash
+  LIBRARY DESTINATION lib COMPONENT RuntimeLibraries
+  ARCHIVE DESTINATION lib COMPONENT Development)
+
+foreach(HIGHWAYHASH_INCLUDE ${HIGHWAYHASH_INCLUDES})
+  install(FILES ${HIGHWAYHASH_INCLUDE} DESTINATION include COMPONENT Development)
+endforeach()
diff --git a/tensorflow/contrib/cmake/tf_cc_ops.cmake b/tensorflow/contrib/cmake/tf_cc_ops.cmake
index 42eeef39e2..b5c33d66ee 100644
--- a/tensorflow/contrib/cmake/tf_cc_ops.cmake
+++ b/tensorflow/contrib/cmake/tf_cc_ops.cmake
@@ -148,7 +148,6 @@ foreach(tf_cc_op_lib_name ${tf_cc_op_lib_names})
         ${CMAKE_THREAD_LIBS_INIT}
         ${PROTOBUF_LIBRARIES}
         tf_protos_cc
-        re2_lib
         ${gif_STATIC_LIBRARIES}
         ${jpeg_STATIC_LIBRARIES}
         ${png_STATIC_LIBRARIES}
@@ -157,7 +156,11 @@ foreach(tf_cc_op_lib_name ${tf_cc_op_lib_names})
         ${boringssl_STATIC_LIBRARIES}
         ${CMAKE_DL_LIBS}
     )
-
+    if(tensorflow_ENABLE_SSL_SUPPORT)
+      target_link_libraries(${tf_cc_op_lib_name}_gen_cc PRIVATE
+          ${boringssl_STATIC_LIBRARIES})
+    endif()
+  
     target_compile_options(${tf_cc_op_lib_name}_gen_cc PRIVATE
         -fno-exceptions
         -DEIGEN_AVOID_STL_ARRAY
diff --git a/tensorflow/contrib/cmake/tf_core_cpu.cmake b/tensorflow/contrib/cmake/tf_core_cpu.cmake
index 135c001536..58635a5266 100644
--- a/tensorflow/contrib/cmake/tf_core_cpu.cmake
+++ b/tensorflow/contrib/cmake/tf_core_cpu.cmake
@@ -34,7 +34,6 @@ add_library(tf_core_cpu OBJECT ${tf_core_cpu_srcs})
 target_include_directories(tf_core_cpu PRIVATE
     ${tensorflow_source_dir}
     ${eigen_INCLUDE_DIRS}
-    ${re2_INCLUDES}
 )
 
 add_dependencies(tf_core_cpu
diff --git a/tensorflow/contrib/cmake/tf_core_distributed_runtime.cmake b/tensorflow/contrib/cmake/tf_core_distributed_runtime.cmake
index d52b18a85e..e1f8bdd609 100644
--- a/tensorflow/contrib/cmake/tf_core_distributed_runtime.cmake
+++ b/tensorflow/contrib/cmake/tf_core_distributed_runtime.cmake
@@ -18,14 +18,12 @@ add_library(tf_core_distributed_runtime OBJECT ${tf_core_distributed_runtime_src
 
 add_dependencies(tf_core_distributed_runtime
     tf_core_cpu grpc
-    re2_copy_headers_to_destination
 )
 
 target_include_directories(tf_core_distributed_runtime PRIVATE
    ${tensorflow_source_dir}
    ${eigen_INCLUDE_DIRS}
    ${GRPC_INCLUDE_DIRS}
-   ${re2_INCLUDE_DIR}
 )
 
 target_compile_options(tf_core_distributed_runtime PRIVATE
@@ -60,7 +58,6 @@ add_executable(grpc_tensorflow_server
 
 add_dependencies(tf_core_distributed_runtime
     grpc
-    re2_copy_headers_to_destination
 )
 
 target_include_directories(grpc_tensorflow_server PUBLIC
@@ -76,8 +73,6 @@ target_link_libraries(grpc_tensorflow_server PUBLIC
     ${PROTOBUF_LIBRARIES}
     ${GRPC_LIBRARIES}
     tf_protos_cc
-    re2_lib
-    ${boringssl_STATIC_LIBRARIES}
     ${farmhash_STATIC_LIBRARIES}
     ${gif_STATIC_LIBRARIES}
     ${jpeg_STATIC_LIBRARIES}
@@ -86,6 +81,10 @@ target_link_libraries(grpc_tensorflow_server PUBLIC
     ${ZLIB_LIBRARIES}
     ${CMAKE_DL_LIBS}
 )
+if(tensorflow_ENABLE_SSL_SUPPORT)
+  target_link_libraries(grpc_tensorflow_server PUBLIC
+      ${boringssl_STATIC_LIBRARIES})
+endif()
 
 target_compile_options(grpc_tensorflow_server PRIVATE
     -fno-exceptions
diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake
index f38697097c..cad3b7864d 100644
--- a/tensorflow/contrib/cmake/tf_core_framework.cmake
+++ b/tensorflow/contrib/cmake/tf_core_framework.cmake
@@ -146,6 +146,14 @@ file(GLOB_RECURSE tf_core_lib_test_srcs
 
 list(REMOVE_ITEM tf_core_lib_srcs ${tf_core_lib_test_srcs}) 
 
+if(NOT tensorflow_ENABLE_SSL_SUPPORT)
+  file(GLOB_RECURSE tf_core_lib_cloud_srcs
+      "${tensorflow_source_dir}/tensorflow/core/platform/cloud/*.h"
+      "${tensorflow_source_dir}/tensorflow/core/platform/cloud/*.cc"
+  )
+  list(REMOVE_ITEM tf_core_lib_srcs ${tf_core_lib_cloud_srcs})
+endif()
+
 add_library(tf_core_lib OBJECT ${tf_core_lib_srcs})
 target_include_directories(tf_core_lib PUBLIC
     ${tensorflow_source_dir}
@@ -153,9 +161,7 @@ target_include_directories(tf_core_lib PUBLIC
     ${jpeg_INCLUDE_DIR}
     ${png_INCLUDE_DIR}
     ${eigen_INCLUDE_DIRS}
-    ${re2_EXTRA_INCLUDE_DIR}
     ${jsoncpp_INCLUDE_DIR}
-    ${boringssl_INCLUDE_DIR}
 )
 target_compile_options(tf_core_lib PRIVATE
     -fno-exceptions
@@ -171,12 +177,16 @@ add_dependencies(tf_core_lib
     gif_copy_headers_to_destination
     jpeg_copy_headers_to_destination
     png_copy_headers_to_destination
-    re2_copy_headers_to_destination
     eigen
     tf_protos_cc
     jsoncpp
-    boringssl
-)
+    )
+
+if(tensorflow_ENABLE_SSL_SUPPORT)
+  target_include_directories(tf_core_lib PUBLIC ${boringssl_INCLUDE_DIR})
+  add_dependencies(tf_core_lib boringssl)
+endif()
+
 
 # Tricky setup to force always rebuilding
 # force_rebuild always runs forcing ${VERSION_INFO_CC} target to run
@@ -230,18 +240,7 @@ add_library(tf_core_framework OBJECT
 target_include_directories(tf_core_framework PUBLIC
     ${tensorflow_source_dir}
     ${eigen_INCLUDE_DIRS}
-    ${re2_INCLUDES}
 )
-#target_link_libraries(tf_core_framework
-#    ${CMAKE_THREAD_LIBS_INIT}
-#    ${PROTOBUF_LIBRARIES}
-#    #${re2_STATIC_LIBRARIES}
-#    re2_lib
-#    ${jpeg_STATIC_LIBRARIES}
-#    ${png_STATIC_LIBRARIES}
-#    tf_protos_cc
-#    tf_core_lib
-#)
 add_dependencies(tf_core_framework
     tf_core_lib
     proto_text
diff --git a/tensorflow/contrib/cmake/tf_models.cmake b/tensorflow/contrib/cmake/tf_models.cmake
index 940492771a..9ba7608551 100644
--- a/tensorflow/contrib/cmake/tf_models.cmake
+++ b/tensorflow/contrib/cmake/tf_models.cmake
@@ -68,7 +68,6 @@ add_library(tf_models_word2vec_kernels OBJECT ${tf_models_word2vec_kernels_srcs}
 target_include_directories(tf_models_word2vec_kernels PRIVATE
     ${tensorflow_source_dir}
     ${eigen_INCLUDE_DIRS}
-    ${re2_INCLUDES}
 )
 
 add_dependencies(tf_models_word2vec_kernels
diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake
index 67f45082f7..b31ef233cf 100644
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@@ -226,13 +226,11 @@ function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name)
         ${CMAKE_THREAD_LIBS_INIT}
         ${PROTOBUF_LIBRARIES}
         tf_protos_cc
-        re2_lib
         ${gif_STATIC_LIBRARIES}
 	${jpeg_STATIC_LIBRARIES}
         ${png_STATIC_LIBRARIES}
         ${ZLIB_LIBRARIES}
         ${jsoncpp_STATIC_LIBRARIES}
-        ${boringssl_STATIC_LIBRARIES}
         ${CMAKE_DL_LIBS}
     )
     target_compile_options(${tf_python_op_lib_name}_gen_python PRIVATE
@@ -244,7 +242,11 @@ function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name)
     target_compile_features(${tf_python_op_lib_name}_gen_python PRIVATE
         cxx_rvalue_references
     )
-
+    if(tensorflow_ENABLE_SSL_SUPPORT)
+      target_link_libraries(${tf_python_op_lib_name}_gen_python PRIVATE
+          ${boringssl_STATIC_LIBRARIES})
+    endif()
+  
     # Use the generated C++ executable to create a Python file
     # containing the wrappers.
     add_custom_command(
@@ -346,8 +348,6 @@ target_link_libraries(pywrap_tensorflow
     tf_protos_cc
     ${GRPC_LIBRARIES}
     ${PROTOBUF_LIBRARY}
-    re2_lib
-    ${boringssl_STATIC_LIBRARIES}
     ${farmhash_STATIC_LIBRARIES}
     ${gif_STATIC_LIBRARIES}
     ${jpeg_STATIC_LIBRARIES}
@@ -367,29 +367,33 @@ target_include_directories(pywrap_tensorflow PUBLIC
 target_compile_features(pywrap_tensorflow PRIVATE
     cxx_rvalue_references
 )
+if(tensorflow_ENABLE_SSL_SUPPORT)
+  target_link_libraries(pywrap_tensorflow ${boringssl_STATIC_LIBRARIES})
+endif()
+
 
 
 ############################################################
 # Build a PIP package containing the TensorFlow runtime.
 ############################################################
-add_custom_target(tf_python_copy_pip_files)
-add_dependencies(tf_python_copy_pip_files
+add_custom_target(tf_python_build_pip_package)
+add_dependencies(tf_python_build_pip_package
     pywrap_tensorflow
     tf_python_copy_scripts_to_destination
     tf_python_touchup_modules
     tf_python_ops)
-add_custom_command(TARGET tf_python_copy_pip_files POST_BUILD
+add_custom_command(TARGET tf_python_build_pip_package POST_BUILD
   COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/tensorflow/contrib/cmake/setup.py
                                    ${CMAKE_CURRENT_BINARY_DIR}/tf_python/)
-add_custom_command(TARGET tf_python_copy_pip_files POST_BUILD
+add_custom_command(TARGET tf_python_build_pip_package POST_BUILD
   COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow.so
                                    ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/_pywrap_tensorflow.so)
-add_custom_command(TARGET tf_python_copy_pip_files POST_BUILD
+add_custom_command(TARGET tf_python_build_pip_package POST_BUILD
   COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/tensorflow/tools/pip_package/README
                                    ${CMAKE_CURRENT_BINARY_DIR}/tf_python/)
-add_custom_command(TARGET tf_python_copy_pip_files POST_BUILD
+add_custom_command(TARGET tf_python_build_pip_package POST_BUILD
   COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/tensorflow/tools/pip_package/MANIFEST.in
                                    ${CMAKE_CURRENT_BINARY_DIR}/tf_python/)
-add_custom_command(TARGET tf_python_copy_pip_files POST_BUILD
+add_custom_command(TARGET tf_python_build_pip_package POST_BUILD
   COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/tf_python/setup.py bdist_wheel
   WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tf_python)
diff --git a/tensorflow/contrib/cmake/tf_tools.cmake b/tensorflow/contrib/cmake/tf_tools.cmake
index f8bbfb348d..87c53502f8 100644
--- a/tensorflow/contrib/cmake/tf_tools.cmake
+++ b/tensorflow/contrib/cmake/tf_tools.cmake
@@ -17,30 +17,25 @@ target_include_directories(${proto_text} PUBLIC
     ${tensorflow_source_dir}
 )
 
+# TODO(mrry): Cut down the dependencies of this tool.
 target_link_libraries(${proto_text} PUBLIC
     ${CMAKE_THREAD_LIBS_INIT}
     ${PROTOBUF_LIBRARIES}
-    # tf_protos_cc
-    # re2_lib
     ${gif_STATIC_LIBRARIES}
     ${jpeg_STATIC_LIBRARIES}
     ${png_STATIC_LIBRARIES}
     ${ZLIB_LIBRARIES}
     ${jsoncpp_STATIC_LIBRARIES}
-    ${boringssl_STATIC_LIBRARIES}
     ${CMAKE_DL_LIBS}
-)
+    )
+if(tensorflow_ENABLE_SSL_SUPPORT)
+  target_link_libraries(${proto_text} PUBLIC ${boringssl_STATIC_LIBRARIES})
+endif()
+
 
 add_dependencies(${proto_text}
     tf_core_lib
     protobuf
-    # jpeg_copy_headers_to_destination
-    # png_copy_headers_to_destination
-    # re2_copy_headers_to_destination
-    # eigen
-    # tf_protos_cc
-    # jsoncpp
-    # boringssl
 )
 
 target_compile_options(${proto_text} PRIVATE
diff --git a/tensorflow/contrib/cmake/tf_tutorials.cmake b/tensorflow/contrib/cmake/tf_tutorials.cmake
index 7f18491d3f..ec45ac4ff8 100644
--- a/tensorflow/contrib/cmake/tf_tutorials.cmake
+++ b/tensorflow/contrib/cmake/tf_tutorials.cmake
@@ -38,7 +38,6 @@ target_link_libraries(tf_tutorials_example_trainer PUBLIC
     ${CMAKE_THREAD_LIBS_INIT}
     ${PROTOBUF_STATIC_LIBRARIES}
     tf_protos_cc
-    re2_lib
     ${boringssl_STATIC_LIBRARIES}
     ${farmhash_STATIC_LIBRARIES}
     ${gif_STATIC_LIBRARIES}
diff --git a/tensorflow/contrib/framework/BUILD b/tensorflow/contrib/framework/BUILD
index db8d10748e..ca625eccb3 100644
--- a/tensorflow/contrib/framework/BUILD
+++ b/tensorflow/contrib/framework/BUILD
@@ -8,6 +8,9 @@ exports_files(["LICENSE"])
 package(default_visibility = ["//tensorflow:__subpackages__"])
 
 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+load("//tensorflow:tensorflow.bzl", "tf_custom_op_library")
+load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py")
+load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs")
 
 py_library(
     name = "framework_py",
@@ -23,7 +26,41 @@ py_library(
         "python/ops/prettyprint_ops.py",
         "python/ops/variables.py",
     ],
+    data = [
+        ":python/ops/_variable_ops.so",
+    ],
     srcs_version = "PY2AND3",
+    deps = [
+        ":gen_variable_ops",
+    ],
+)
+
+tf_custom_op_library(
+    name = "python/ops/_variable_ops.so",
+    srcs = [
+        "kernels/zero_initializer_op.cc",
+        "kernels/zero_initializer_op.h",
+        "ops/variable_ops.cc",
+    ],
+)
+
+tf_gen_op_libs(
+    op_lib_names = ["variable_ops"],
+)
+
+cc_library(
+    name = "all_ops",
+    deps = [
+        ":variable_ops_op_lib",
+    ],
+)
+
+tf_gen_op_wrapper_py(
+    name = "gen_variable_ops",
+    out = "python/ops/gen_variable_ops.py",
+    deps = [
+        ":variable_ops_op_lib",
+    ],
 )
 
 py_test(
diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py
index 5bc0e563ec..9414fffda4 100644
--- a/tensorflow/contrib/framework/__init__.py
+++ b/tensorflow/contrib/framework/__init__.py
@@ -60,6 +60,7 @@
 @@model_variable
 @@variable
 @@VariableDeviceChooser
+@@zero_initializer
 """
 
 from __future__ import absolute_import
diff --git a/tensorflow/contrib/framework/kernels/zero_initializer_op.cc b/tensorflow/contrib/framework/kernels/zero_initializer_op.cc
new file mode 100644
index 0000000000..8a37078059
--- /dev/null
+++ b/tensorflow/contrib/framework/kernels/zero_initializer_op.cc
@@ -0,0 +1,37 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/framework/kernels/zero_initializer_op.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/op_kernel.h"
+
+namespace tensorflow {
+#define REGISTER_KERNELS(D, T) \
+  REGISTER_KERNEL_BUILDER(Name("ZeroInitializer") \
+      .Device(DEVICE_##D) \
+      .TypeConstraint<T>("T"), \
+      ZeroInitializerOp<T>);
+#define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T);
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
+#undef REGISTER_CPU_KERNELS
+
+#if GOOGLE_CUDA
+#define REGISTER_GPU_KERNELS(T) REGISTER_KERNELS(GPU, T);
+TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
+#undef REGISTER_GPU_KERNELS
+#endif // GOOGLE_CUDA
+
+#undef REGISTER_KERNELS
+} // namespace tensorflow
diff --git a/tensorflow/contrib/framework/kernels/zero_initializer_op.h b/tensorflow/contrib/framework/kernels/zero_initializer_op.h
new file mode 100644
index 0000000000..dea40ede90
--- /dev/null
+++ b/tensorflow/contrib/framework/kernels/zero_initializer_op.h
@@ -0,0 +1,59 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_FRAMEWORK_KERNELS_ZERO_INITIALIZER_OP_H_
+#define TENSORFLOW_CONTRIB_FRAMEWORK_KERNELS_ZERO_INITIALIZER_OP_H_
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/op_kernel.h"
+
+namespace tensorflow {
+
+template <typename T>
+class ZeroInitializerOp : public OpKernel {
+  public:
+    explicit ZeroInitializerOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+      OP_REQUIRES(ctx, IsRefType(ctx->input_type(0)),
+          errors::InvalidArgument("input needs to be a ref type"));
+    }
+    void Compute(OpKernelContext* ctx) override {
+      mutex_lock l(*ctx->input_ref_mutex(0));
+      Tensor input = ctx->mutable_input(0, true);
+      OP_REQUIRES(ctx, !input.IsInitialized(),
+          errors::InvalidArgument("input is already initialized"));
+      AllocatorAttributes attr;
+      attr.set_gpu_compatible(true);
+      attr.set_nic_compatible(true);
+      PersistentTensor out_persistent;
+      Tensor* out_tensor = nullptr;
+      OP_REQUIRES_OK(ctx, ctx->allocate_persistent(
+            input.dtype(), input.shape(), &out_persistent,
+            &out_tensor, attr));
+      auto out_tensor_flat = out_tensor->flat<T>();
+      int total_size = static_cast<int>(1);
+      for (int d = static_cast<int>(0); d < out_tensor->dims(); d++) {
+        total_size *= out_tensor->dim_size(d);
+      }
+      for (int idx = static_cast<int>(0); idx < total_size; idx++) {
+        out_tensor_flat(idx) = static_cast<T>(0);
+      }
+      ctx->replace_ref_input(0, *out_tensor, true);
+      // we always return the input ref.
+      ctx->forward_ref_input_to_ref_output(0, 0);
+    }
+};
+
+} // end namespace tensorflow
+#endif // TENSORFLOW_CONTRIB_FRAMEWORK_KERNELS_ZERO_INITIALIZER_OP_H_
diff --git a/tensorflow/contrib/framework/ops/variable_ops.cc b/tensorflow/contrib/framework/ops/variable_ops.cc
new file mode 100644
index 0000000000..8f909f8ba7
--- /dev/null
+++ b/tensorflow/contrib/framework/ops/variable_ops.cc
@@ -0,0 +1,43 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+
+namespace tensorflow {
+
+using shape_inference::InferenceContext;
+using shape_inference::Shape;
+
+REGISTER_OP("ZeroInitializer")
+    .Input("ref: Ref(T)")
+    .Output("output_ref: Ref(T)")
+    .Attr("T: realnumbertype")
+    .SetAllowsUninitializedInput()
+    .SetShapeFn([](InferenceContext* c) {
+        c->set_output(0, c->input(0));
+        return Status::OK();
+    })
+    .Doc(R"doc(
+Initialize 'ref' with all zeros. This op requires that the tensor is not
+initialized. The tensor will first be allocated memory, then be filled with all
+zeros. This op is intended to save memory during initialization,
+if you use this op, you should not run initializer of the 'ref' tensor.
+
+ref: Should be from a `Variable` node.
+output_ref:= Same as "ref".
+)doc");
+
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/framework/python/ops/variables.py b/tensorflow/contrib/framework/python/ops/variables.py
index e006add43a..2475a2fb21 100644
--- a/tensorflow/contrib/framework/python/ops/variables.py
+++ b/tensorflow/contrib/framework/python/ops/variables.py
@@ -20,6 +20,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib.framework.python.ops import add_arg_scope as contrib_add_arg_scope
+from tensorflow.contrib.framework.python.ops import gen_variable_ops
 from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.framework import device as tf_device
 from tensorflow.python.framework import dtypes
@@ -29,8 +30,11 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
+from tensorflow.python.ops import gen_state_ops
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import saver as tf_saver
+from tensorflow.python.framework.load_library import load_op_library
+from tensorflow.python.platform import resource_loader
 
 
 __all__ = ['add_model_variable',
@@ -53,9 +57,33 @@ __all__ = ['add_model_variable',
            'local_variable',
            'model_variable',
            'variable',
-           'VariableDeviceChooser']
+           'VariableDeviceChooser',
+           'zero_initializer']
 
 
+def zero_initializer(ref, use_locking=True, name="zero_initializer"):
+  """Initialize 'ref' with all zeros, ref tensor should be uninitialized.
+  If already initialized, you will get ValueError. This op is intended to
+  save memory during initialization.
+  Args:
+    ref: ref of the tensor need to be zero initialized.
+    name: optional name for this operation.
+  Returns:
+    ref that initialized.
+  Raises:
+    ValueError: If ref tensor is initialized.
+  """
+  _variable_ops = load_op_library(resource_loader.get_path_to_datafile(
+        "_variable_ops.so"))
+  assert _variable_ops, "Could not load _variable_ops.so"
+  return gen_variable_ops.zero_initializer(ref, name=name)
+
+# shape function for _ZeroInitializerOp
+@ops.RegisterShape("ZeroInitializer")
+def _ZeroInitializerShape(op):
+  var_shape = op.inputs[0].get_shape()
+  return [var_shape]
+
 def assert_global_step(global_step_tensor):
   """Asserts `global_step_tensor` is a scalar int `Variable` or `Tensor`.
 
diff --git a/tensorflow/contrib/framework/python/ops/variables_test.py b/tensorflow/contrib/framework/python/ops/variables_test.py
index d6e1d03a56..eb0a2c2d8e 100644
--- a/tensorflow/contrib/framework/python/ops/variables_test.py
+++ b/tensorflow/contrib/framework/python/ops/variables_test.py
@@ -1053,5 +1053,28 @@ class AssignFromCheckpointFnTest(tf.test.TestCase):
       self.assertEqual(init_value0, var0.eval())
       self.assertEqual(init_value1, var1.eval())
 
+class ZeroInitializerOpTest(tf.test.TestCase):
+
+  def _testZeroInitializer(self, shape, initializer, use_init):
+    var = tf.Variable(initializer)
+    var_zero = tf.contrib.framework.zero_initializer(var)
+    with self.test_session() as sess:
+      with self.assertRaisesOpError("Attempting to use uninitialized value"):
+        var.eval()
+      if use_init:
+        sess.run(var.initializer)
+        with self.assertRaisesOpError("input is already initialized"):
+          var_zero.eval()
+        self.assertAllClose(np.ones(shape), var.eval())
+      else:
+        var_zero.eval()
+        self.assertAllClose(np.zeros(shape), var.eval())
+
+  def testZeroInitializer(self):
+    for dtype in (tf.int32, tf.int64, tf.float32, tf.float64):
+      for use_init in (False, True):
+        self._testZeroInitializer(
+            [10, 20], tf.ones([10, 20], dtype = dtype), use_init)
+
 if __name__ == '__main__':
   tf.test.main()
diff --git a/tensorflow/contrib/ios_examples/benchmark/benchmark.xcodeproj/project.pbxproj b/tensorflow/contrib/ios_examples/benchmark/benchmark.xcodeproj/project.pbxproj
index a726698747..5cd173b416 100644
--- a/tensorflow/contrib/ios_examples/benchmark/benchmark.xcodeproj/project.pbxproj
+++ b/tensorflow/contrib/ios_examples/benchmark/benchmark.xcodeproj/project.pbxproj
@@ -281,7 +281,7 @@
 					"$(SRCROOT)/../../makefile/downloads/protobuf/src/",
 					"$(SRCROOT)/../../makefile/downloads",
 					"$(SRCROOT)/../../makefile/gen/proto",
-					"$(SRCROOT)/../../makefile/downloads/eigen-latest",
+					"$(SRCROOT)/../../makefile/downloads/eigen",
 				);
 				INFOPLIST_FILE = "$(SRCROOT)/Benchmark-Info.plist";
 				IPHONEOS_DEPLOYMENT_TARGET = 9.2;
@@ -315,7 +315,7 @@
 					"$(SRCROOT)/../../makefile/downloads/protobuf/src/",
 					"$(SRCROOT)/../../makefile/downloads",
 					"$(SRCROOT)/../../makefile/gen/proto",
-					"$(SRCROOT)/../../makefile/downloads/eigen-latest",
+					"$(SRCROOT)/../../makefile/downloads/eigen",
 				);
 				INFOPLIST_FILE = "$(SRCROOT)/Benchmark-Info.plist";
 				IPHONEOS_DEPLOYMENT_TARGET = 9.2;
diff --git a/tensorflow/contrib/ios_examples/camera/Info.plist b/tensorflow/contrib/ios_examples/camera/Info.plist
index d374f914b7..0cd75f61f7 100644
--- a/tensorflow/contrib/ios_examples/camera/Info.plist
+++ b/tensorflow/contrib/ios_examples/camera/Info.plist
@@ -36,5 +36,7 @@
 	<array>
 		<string>UIInterfaceOrientationPortrait</string>
 	</array>
+	<key>NSCameraUsageDescription</key>
+	<string>Capture images to detect object</string>
 </dict>
 </plist>
diff --git a/tensorflow/contrib/ios_examples/camera/camera_example.xcodeproj/project.pbxproj b/tensorflow/contrib/ios_examples/camera/camera_example.xcodeproj/project.pbxproj
index c123df9586..e122fc3012 100644
--- a/tensorflow/contrib/ios_examples/camera/camera_example.xcodeproj/project.pbxproj
+++ b/tensorflow/contrib/ios_examples/camera/camera_example.xcodeproj/project.pbxproj
@@ -289,7 +289,7 @@
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				HEADER_SEARCH_PATHS = (
 					"$(SRCROOT)/../../makefile/gen/proto",
-					"$(SRCROOT)/../../makefile/downloads/eigen-latest",
+					"$(SRCROOT)/../../makefile/downloads/eigen",
 					"$(SRCROOT)/../../makefile/downloads",
 					"$(SRCROOT)/../../makefile/downloads/protobuf/src/",
 					"$(SRCROOT)/../../../..",
@@ -348,7 +348,7 @@
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				HEADER_SEARCH_PATHS = (
 					"$(SRCROOT)/../../makefile/gen/proto",
-					"$(SRCROOT)/../../makefile/downloads/eigen-latest",
+					"$(SRCROOT)/../../makefile/downloads/eigen",
 					"$(SRCROOT)/../../makefile/downloads",
 					"$(SRCROOT)/../../makefile/downloads/protobuf/src/",
 					"$(SRCROOT)/../../../..",
diff --git a/tensorflow/contrib/ios_examples/simple/tf_ios_makefile_example.xcodeproj/project.pbxproj b/tensorflow/contrib/ios_examples/simple/tf_ios_makefile_example.xcodeproj/project.pbxproj
index ae9f49dacd..94a0037e4f 100644
--- a/tensorflow/contrib/ios_examples/simple/tf_ios_makefile_example.xcodeproj/project.pbxproj
+++ b/tensorflow/contrib/ios_examples/simple/tf_ios_makefile_example.xcodeproj/project.pbxproj
@@ -284,7 +284,7 @@
 					"$(SRCROOT)/../../makefile/downloads/protobuf/src/",
 					"$(SRCROOT)/../../makefile/downloads",
 					"$(SRCROOT)/../../makefile/gen/proto",
-					"$(SRCROOT)/../../makefile/downloads/eigen-latest",
+					"$(SRCROOT)/../../makefile/downloads/eigen",
 				);
 				INFOPLIST_FILE = "$(SRCROOT)/RunModel-Info.plist";
 				IPHONEOS_DEPLOYMENT_TARGET = 9.2;
@@ -323,7 +323,7 @@
 					"$(SRCROOT)/../../makefile/downloads/protobuf/src/",
 					"$(SRCROOT)/../../makefile/downloads",
 					"$(SRCROOT)/../../makefile/gen/proto",
-					"$(SRCROOT)/../../makefile/downloads/eigen-latest",
+					"$(SRCROOT)/../../makefile/downloads/eigen",
 				);
 				INFOPLIST_FILE = "$(SRCROOT)/RunModel-Info.plist";
 				IPHONEOS_DEPLOYMENT_TARGET = 9.2;
diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py
index dc4ee9226a..aee57dbeaf 100644
--- a/tensorflow/contrib/layers/python/layers/layers.py
+++ b/tensorflow/contrib/layers/python/layers/layers.py
@@ -117,6 +117,7 @@ def batch_norm(inputs,
                scale=False,
                epsilon=0.001,
                activation_fn=None,
+               initializers={},
                updates_collections=ops.GraphKeys.UPDATE_OPS,
                is_training=True,
                reuse=None,
@@ -211,39 +212,43 @@ def batch_norm(inputs,
     if center:
       beta_collections = utils.get_variable_collections(variables_collections,
                                                         'beta')
+      beta_initializer = initializers.get('beta', init_ops.zeros_initializer)
       beta = variables.model_variable('beta',
                                       shape=params_shape,
                                       dtype=dtype,
-                                      initializer=init_ops.zeros_initializer,
+                                      initializer=beta_initializer,
                                       collections=beta_collections,
                                       trainable=trainable)
     if scale:
       gamma_collections = utils.get_variable_collections(variables_collections,
                                                          'gamma')
+      gamma_initializer = initializers.get('gamma', init_ops.ones_initializer)
       gamma = variables.model_variable('gamma',
                                        shape=params_shape,
                                        dtype=dtype,
-                                       initializer=init_ops.ones_initializer,
+                                       initializer=gamma_initializer,
                                        collections=gamma_collections,
                                        trainable=trainable)
     # Create moving_mean and moving_variance variables and add them to the
     # appropiate collections.
     moving_mean_collections = utils.get_variable_collections(
         variables_collections, 'moving_mean')
+    moving_mean_initializer = initializers.get('moving_mean', init_ops.zeros_initializer)
     moving_mean = variables.model_variable(
         'moving_mean',
         shape=params_shape,
         dtype=dtype,
-        initializer=init_ops.zeros_initializer,
+        initializer=moving_mean_initializer,
         trainable=False,
         collections=moving_mean_collections)
     moving_variance_collections = utils.get_variable_collections(
         variables_collections, 'moving_variance')
+    moving_variance_initializer = initializers.get('moving_variance', init_ops.ones_initializer)
     moving_variance = variables.model_variable(
         'moving_variance',
         shape=params_shape,
         dtype=dtype,
-        initializer=init_ops.ones_initializer,
+        initializer=moving_variance_initializer,
         trainable=False,
         collections=moving_variance_collections)
 
diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py
index c67702fe98..b40a8936c7 100644
--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
@@ -1639,6 +1639,29 @@ class BatchNormTest(tf.test.TestCase):
       self.assertAllClose(moving_mean.eval(), expected_mean)
       self.assertAllClose(moving_variance.eval(), expected_var)
 
+  def testCustomInitializer(self):
+    height, width = 3, 3
+    channels = 3
+    with self.test_session() as sess:
+      images = np.ones((5, height, width, channels))*9.0
+      beta = tf.constant_initializer(np.ones(channels)*5.0)
+      gamma = tf.constant_initializer(np.ones(channels)*2.0)
+      mean = tf.constant_initializer(np.ones(channels)*5.0)
+      variance = tf.constant_initializer(np.ones(channels)*4.0)
+      output = tf.contrib.layers.batch_norm(images,
+                                            is_training=False,
+                                            scale=True,
+                                            epsilon=0.0,
+                                            initializers={
+                                              'beta': beta,
+                                              'gamma': gamma,
+                                              'moving_mean': mean,
+                                              'moving_variance': variance,
+                                            })
+      sess.run(tf.initialize_all_variables())
+      outs = sess.run(output)
+      self.assertAllClose(outs, images)
+
 
 class LayerNormTest(tf.test.TestCase):
 
diff --git a/tensorflow/contrib/layers/python/layers/optimizers.py b/tensorflow/contrib/layers/python/layers/optimizers.py
index e38b93790b..b6e3af5451 100644
--- a/tensorflow/contrib/layers/python/layers/optimizers.py
+++ b/tensorflow/contrib/layers/python/layers/optimizers.py
@@ -61,7 +61,8 @@ def optimize_loss(loss,
                   update_ops=None,
                   variables=None,
                   name=None,
-                  summaries=None):
+                  summaries=None,
+                  colocate_gradients_with_ops=False):
   """Given loss and parameters for optimizer, returns a training op.
 
   Various ways of passing optimizers, include:
@@ -112,6 +113,8 @@ def optimize_loss(loss,
     summaries: List of internal quantities to visualize on tensorboard. If not
                set only the loss and the learning rate will be reported. The
                complete list is in OPTIMIZER_SUMMARIES.
+    colocate_gradients_with_ops: If True, try colocating gradients with the 
+                                 corresponding op.
 
   Returns:
     Training op.
@@ -185,7 +188,8 @@ def optimize_loss(loss,
       variables = vars_.trainable_variables()
 
     # Compute gradients.
-    gradients = opt.compute_gradients(loss, variables)
+    gradients = opt.compute_gradients(loss, variables,
+                                      colocate_gradients_with_ops=colocate_gradients_with_ops)
 
     # Optionally add gradient noise.
     if gradient_noise_scale is not None:
diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD
index c72b23f3ac..2c972421af 100644
--- a/tensorflow/contrib/learn/BUILD
+++ b/tensorflow/contrib/learn/BUILD
@@ -408,6 +408,7 @@ py_test(
     size = "small",
     srcs = ["python/learn/estimators/classifier_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["manual"],  # http://b/31032996
     deps = [
         ":learn",
         "//tensorflow:tensorflow_py",
diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile
index a5d3cb49ff..361aca5b95 100644
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -426,6 +426,8 @@ $(wildcard tensorflow/core/platform/*/*/*.cc) \
 $(wildcard tensorflow/core/util/*.cc) \
 $(wildcard tensorflow/core/util/*/*.cc) \
 tensorflow/core/util/version_info.cc
+# Remove duplicates (for version_info.cc)
+CORE_CC_ALL_SRCS := $(sort $(CORE_CC_ALL_SRCS))
 CORE_CC_EXCLUDE_SRCS := \
 $(wildcard tensorflow/core/*/*test.cc) \
 $(wildcard tensorflow/core/*/*testutil*) \
diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md
index fd68760be8..b5fa454dcd 100644
--- a/tensorflow/contrib/makefile/README.md
+++ b/tensorflow/contrib/makefile/README.md
@@ -141,6 +141,12 @@ xcode-select --install
 If this is a new install, you will need to run XCode once to agree to the
 license before continuing.
 
+Then install [automake](https://en.wikipedia.org/wiki/Automake):
+
+```bash
+brew install automake
+```
+
 Also, download the graph if you haven't already:
 
 ```bash
diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh
index 3fc841edd0..e6622a26e9 100755
--- a/tensorflow/contrib/makefile/download_dependencies.sh
+++ b/tensorflow/contrib/makefile/download_dependencies.sh
@@ -19,7 +19,12 @@ set -e
 DOWNLOADS_DIR=tensorflow/contrib/makefile/downloads
 BZL_FILE_PATH=tensorflow/workspace.bzl
 
-EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/.*tar\.gz' "${BZL_FILE_PATH}")"
+EIGEN_VERSION="$(sed -ne 's/^[ \t]*eigen_version = "\(.*\)".*$/\1/p' "${BZL_FILE_PATH}")"
+if [ "${EIGEN_VERSION}" == '' ]; then
+    echo "Cannot extract eigen_version from ${BZL_FILE_PATH}" >&2
+    exit 1
+fi
+EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/' "${BZL_FILE_PATH}")${EIGEN_VERSION}.tar.gz"
 GEMMLOWP_URL="$(grep -o 'http.*github.com/google/gemmlowp/.*tar\.gz' "${BZL_FILE_PATH}")"
 GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz"
 PROTOBUF_URL="$(grep -o 'http.*github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}")"
diff --git a/tensorflow/contrib/slim/README.md b/tensorflow/contrib/slim/README.md
index ed9d53fc4c..f97ee40f32 100644
--- a/tensorflow/contrib/slim/README.md
+++ b/tensorflow/contrib/slim/README.md
@@ -69,7 +69,7 @@ and
 models.
 * [preprocess](https://www.tensorflow.org/code/tensorflow/contrib/slim/python/slim/preprocess.py):
 is a module with various preprocessing utilities.
-* [queues](https://www.tensorflow.org/code/tensorflow/contrib/slim/queues.py):
+* [queues](https://www.tensorflow.org/code/tensorflow/contrib/slim/python/slim/queues.py):
 provides a context manager for easily and safely starting and closing
 QueueRunners.
 * [regularizers](https://www.tensorflow.org/code/tensorflow/contrib/layers/python/layers/regularizers.py):
@@ -503,7 +503,7 @@ pose_loss = MyCustomLossFunction(pose_predictions, pose_labels)
 slim.losses.add_loss(pose_loss) # Letting TF-Slim know about the additional loss.
 
 # The following two ways to compute the total loss are equivalent:
-regularization_loss = tf.add_n(slim.get_regularization_losses())
+regularization_loss = tf.add_n(slim.losses.get_regularization_losses())
 total_loss1 = classification_loss + sum_of_squares_loss + pose_loss + regularization_loss
 
 # (Regularization Loss is included in the total loss by default).
diff --git a/tensorflow/core/common_runtime/simple_placer.h b/tensorflow/core/common_runtime/simple_placer.h
index 77600c9c99..59be859449 100644
--- a/tensorflow/core/common_runtime/simple_placer.h
+++ b/tensorflow/core/common_runtime/simple_placer.h
@@ -33,8 +33,8 @@ namespace tensorflow {
 // devices the given DeviceSet, respecting the following constraints:
 //
 // 1. Existing device assignments remain unchanged.
-// 2. Requested (partial or complete) device specifications in the
-//    are granted.
+// 2. Requested (partial or complete) device specifications given by device name
+//    for each node are granted.
 // 3. Nodes connected by edges of a reference type are colocated on
 //    the same device.
 // 4. Given nodes "A" and "B", if node "B" has a colocation group
diff --git a/tensorflow/core/kernels/conv_ops_fused.cc b/tensorflow/core/kernels/conv_ops_fused.cc
index a041c6e9f8..697ee5d25a 100644
--- a/tensorflow/core/kernels/conv_ops_fused.cc
+++ b/tensorflow/core/kernels/conv_ops_fused.cc
@@ -46,9 +46,16 @@ namespace {
 // In this case, we've picked 16 megabytes as a reasonable limit.
 const size_t kMaxChunkSize = (16 * 1024 * 1024);
 
+// Lookup method used when resizing.
+enum SamplingMode {
+  BILINEAR = 0,
+  NEAREST = 1,
+};
+
 // Combines bilinear resizing and mirror padding into the im2col transformation
-// stage of convolution,
-template <class T1, class T2, class T3, class TGemmFunctor>
+// stage of convolution.
+template <class T1, class T2, class T3, class TGemmFunctor,
+	  SamplingMode SampleMode>
 class FusedResizeAndPadConvFunctor {
  public:
   void operator()(OpKernelContext* context, const Tensor& input,
@@ -78,6 +85,9 @@ class FusedResizeAndPadConvFunctor {
                    << output_width << ", " << output_height;
       return;
     }
+    OP_REQUIRES(
+        context, ((SampleMode == NEAREST) || (SampleMode == BILINEAR)),
+        errors::InvalidArgument("Bad sample mode passed in", SampleMode));
 
     // These calculations define how the patches will be positioned within the
     // input image. The actual definitions are quite complex, and rely on the
@@ -183,18 +193,24 @@ class FusedResizeAndPadConvFunctor {
                 T1 in_value;
                 if ((conv_in_x >= 0) && (conv_in_x < padded_width) &&
                     (conv_in_y >= 0) && (conv_in_y < padded_height)) {
-                  const T1 top_left(
-                      input_data(batch, top_y_index, left_x_index, in_channel));
-                  const T1 top_right(input_data(batch, top_y_index,
-                                                right_x_index, in_channel));
-                  const T1 bottom_left(input_data(batch, bottom_y_index,
-                                                  left_x_index, in_channel));
-                  const T1 bottom_right(input_data(batch, bottom_y_index,
-                                                   right_x_index, in_channel));
-                  const T1 top = top_left + (top_right - top_left) * x_lerp;
-                  const T1 bottom =
-                      bottom_left + (bottom_right - bottom_left) * x_lerp;
-                  in_value = top + (bottom - top) * y_lerp;
+                  if (SampleMode == NEAREST) {
+                    const T1 top_left(input_data(batch, top_y_index,
+                                                 left_x_index, in_channel));
+                    in_value = top_left;
+                  } else if (SampleMode == BILINEAR) {
+                    const T1 top_left(input_data(batch, top_y_index,
+                                                 left_x_index, in_channel));
+                    const T1 top_right(input_data(batch, top_y_index,
+                                                  right_x_index, in_channel));
+                    const T1 bottom_left(input_data(batch, bottom_y_index,
+                                                    left_x_index, in_channel));
+                    const T1 bottom_right(input_data(
+                        batch, bottom_y_index, right_x_index, in_channel));
+                    const T1 top = top_left + (top_right - top_left) * x_lerp;
+                    const T1 bottom =
+                        bottom_left + (bottom_right - bottom_left) * x_lerp;
+                    in_value = top + (bottom - top) * y_lerp;
+                  }
                 } else {
                   in_value = T1(0);
                 }
@@ -208,8 +224,8 @@ class FusedResizeAndPadConvFunctor {
               ((batch == (input_batches - 1)) &&
                (out_y == (output_height - 1)) && (out_x == (output_width - 1)));
           if (is_last_in_chunk || is_last_overall) {
-            // Now we've assembled a set of image patches into a matrix, apply a
-            // GEMM matrix multiply of the patches as rows, times the filter
+            // Now we've assembled a set of image patches into a matrix, apply
+            // a GEMM matrix multiply of the patches as rows, times the filter
             // weights in columns, to get partial results in the output matrix.
             const int how_many_patches = patch_index_within_chunk + 1;
             const int m = how_many_patches;
@@ -236,13 +252,15 @@ class FusedResizeAndPadConvFunctor {
 
 // Implements a version of convolution with bilinear resizing and mirror padding
 // included.
-template <class T, class TConvFunctor>
+template <class T, class TConvFunctor, bool DoResize>
 class FusedResizeConv2DUsingGemmOp : public OpKernel {
  public:
   explicit FusedResizeConv2DUsingGemmOp(OpKernelConstruction* context)
       : OpKernel(context) {
-    OP_REQUIRES_OK(context,
-                   context->GetAttr("resize_align_corners", &align_corners_));
+    if (DoResize) {
+      OP_REQUIRES_OK(context,
+                     context->GetAttr("resize_align_corners", &align_corners_));
+    }
     MirrorPadMode mode;
     OP_REQUIRES_OK(context, context->GetAttr("mode", &mode));
 
@@ -280,13 +298,34 @@ class FusedResizeConv2DUsingGemmOp : public OpKernel {
     OP_REQUIRES(context, (input.shape().num_elements() > 0),
                 errors::InvalidArgument("Input tensor can't be empty"));
 
-    ImageResizerState st(align_corners_);
-    st.ValidateAndCalculateOutputSize(context, input);
-    if (!context->status().ok()) return;
-    const TensorShape resized_shape(
+    ImageResizerState st(false);
+    if (DoResize) {
+      st = ImageResizerState(align_corners_);
+      st.ValidateAndCalculateOutputSize(context, input);
+      if (!context->status().ok()) return;
+    } else {
+      // Set up the resize parameters to do no scaling at all.
+      st.batch_size = input.dim_size(0);
+      st.out_height = input.dim_size(1);
+      st.out_width = input.dim_size(2);
+      st.in_height = input.dim_size(1);
+      st.in_width = input.dim_size(2);
+      st.channels = input.dim_size(3);
+      st.height_scale = 1.0f;
+      st.width_scale = 1.0f;
+    }
+    TensorShape resized_shape(
         {input.dim_size(0), st.out_height, st.out_width, input.dim_size(3)});
-
-    const Tensor& paddings = context->input(2);
+    int paddings_index;
+    int filter_index;
+    if (DoResize) {
+      paddings_index = 2;
+      filter_index = 3;
+    } else {
+      paddings_index = 1;
+      filter_index = 2;
+    }
+    const Tensor& paddings = context->input(paddings_index);
 
     const int dims = resized_shape.dims();
     OP_REQUIRES(
@@ -365,7 +404,7 @@ class FusedResizeConv2DUsingGemmOp : public OpKernel {
 
     // Input filter is of the following dimensions:
     // [ filter_rows, filter_cols, in_depth, out_depth]
-    const Tensor& filter = context->input(3);
+    const Tensor& filter = context->input(filter_index);
 
     // For 2D convolution, there should be 4 dimensions.
     OP_REQUIRES(context, padded_shape.dims() == 4,
@@ -473,15 +512,26 @@ class FusedResizeConv2DUsingGemmOp : public OpKernel {
   TF_DISALLOW_COPY_AND_ASSIGN(FusedResizeConv2DUsingGemmOp);
 };
 
-#define REGISTER_FUSED(T)             \
-  REGISTER_KERNEL_BUILDER(            \
-      Name("FusedResizeAndPadConv2D") \
-          .Device(DEVICE_CPU)         \
-          .TypeConstraint<T>("T"),    \
-      FusedResizeConv2DUsingGemmOp<   \
-          T,                          \
-          FusedResizeAndPadConvFunctor<T, T, T, FastGemmFunctor<T, T, T>>>);
+#define REGISTER_FUSED(T)                                                    \
+  REGISTER_KERNEL_BUILDER(                                                   \
+      Name("FusedResizeAndPadConv2D")                                        \
+          .Device(DEVICE_CPU)                                                \
+          .TypeConstraint<T>("T"),                                           \
+      FusedResizeConv2DUsingGemmOp<                                          \
+          T, FusedResizeAndPadConvFunctor<T, T, T, FastGemmFunctor<T, T, T>, \
+                                          BILINEAR>,                         \
+          true>);
 
 TF_CALL_float(REGISTER_FUSED);
 
+#define REGISTER_PAD_ONLY_FUSED(T)                                           \
+  REGISTER_KERNEL_BUILDER(                                                   \
+      Name("FusedPadConv2D").Device(DEVICE_CPU).TypeConstraint<T>("T"),      \
+      FusedResizeConv2DUsingGemmOp<                                          \
+          T, FusedResizeAndPadConvFunctor<T, T, T, FastGemmFunctor<T, T, T>, \
+                                          NEAREST>,                          \
+          false>);
+
+TF_CALL_float(REGISTER_PAD_ONLY_FUSED);
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/conv_ops_test.cc b/tensorflow/core/kernels/conv_ops_test.cc
index 228f2d5def..f955e6a6b6 100644
--- a/tensorflow/core/kernels/conv_ops_test.cc
+++ b/tensorflow/core/kernels/conv_ops_test.cc
@@ -121,22 +121,15 @@ class FusedResizePadConvOpTest : public OpsTestBase {
     auto root = tensorflow::Scope::NewRootScope();
     using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
 
-    const size_t input_data_size = input_height * input_width * input_depth;
     Tensor input_data(DT_FLOAT,
                       TensorShape({1, input_height, input_width, input_depth}));
-    for (int i = 0; i < input_data_size; ++i) {
-      input_data.flat<float>()(i) = i + 1.0f;
-    }
+    test::FillIota<float>(&input_data, 1.0f);
     Output input =
         Const(root.WithOpName("input"), Input::Initializer(input_data));
 
-    const size_t filter_data_size =
-        filter_size * filter_size * filter_count * input_depth;
     Tensor filter_data(DT_FLOAT, TensorShape({filter_size, filter_size,
                                               input_depth, filter_count}));
-    for (int i = 0; i < filter_data_size; ++i) {
-      filter_data.flat<float>()(i) = i + 1.0f;
-    }
+    test::FillIota<float>(&filter_data, 1.0f);
     Output filter =
         Const(root.WithOpName("filter"), Input::Initializer(filter_data));
 
@@ -173,6 +166,54 @@ class FusedResizePadConvOpTest : public OpsTestBase {
 
     test::ExpectTensorNear<float>(unfused_tensors[0], fused_tensors[0], 1e-5);
   }
+
+  void CompareFusedPadOnlyAndSeparate(int input_width, int input_height,
+                                      int input_depth, int y_padding,
+                                      int x_padding, int filter_size,
+                                      int filter_count, string pad_mode,
+                                      int stride, string padding) {
+    auto root = tensorflow::Scope::NewRootScope();
+    using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
+
+    Tensor input_data(DT_FLOAT,
+                      TensorShape({1, input_height, input_width, input_depth}));
+    test::FillIota<float>(&input_data, 1.0f);
+    Output input =
+        Const(root.WithOpName("input"), Input::Initializer(input_data));
+
+    Tensor filter_data(DT_FLOAT, TensorShape({filter_size, filter_size,
+                                              input_depth, filter_count}));
+    test::FillIota<float>(&filter_data, 1.0f);
+    Output filter =
+        Const(root.WithOpName("filter"), Input::Initializer(filter_data));
+
+    Output paddings =
+        Const(root.WithOpName("paddings"),
+              {{0, 0}, {y_padding, y_padding}, {x_padding, x_padding}, {0, 0}});
+    Output mirror_pad =
+        MirrorPad(root.WithOpName("mirror_pad"), input, paddings, pad_mode);
+    Output conv = Conv2D(root.WithOpName("conv"), mirror_pad, filter,
+                         {1, stride, stride, 1}, padding);
+
+    Output fused_conv =
+        FusedPadConv2D(root.WithOpName("fused_conv"), input, paddings, filter,
+                       pad_mode, {1, stride, stride, 1}, padding);
+
+    tensorflow::GraphDef graph;
+    TF_ASSERT_OK(root.ToGraphDef(&graph));
+
+    std::unique_ptr<tensorflow::Session> session(
+        tensorflow::NewSession(tensorflow::SessionOptions()));
+    TF_ASSERT_OK(session->Create(graph));
+
+    std::vector<Tensor> unfused_tensors;
+    TF_ASSERT_OK(session->Run({}, {"conv"}, {}, &unfused_tensors));
+
+    std::vector<Tensor> fused_tensors;
+    TF_ASSERT_OK(session->Run({}, {"fused_conv"}, {}, &fused_tensors));
+
+    test::ExpectTensorNear<float>(unfused_tensors[0], fused_tensors[0], 1e-5);
+  }
 };
 
 TEST_F(FusedResizePadConvOpTest, HandwrittenConv) { HandwrittenConv(); }
@@ -237,4 +278,24 @@ TEST_F(FusedResizePadConvOpTest, ResizeAndPadSymmetricComparative) {
                           "SAME");
 }
 
+TEST_F(FusedResizePadConvOpTest, NoResizeIdentityComparative) {
+  CompareFusedPadOnlyAndSeparate(10, 10, 1, 0, 0, 1, 1, "REFLECT", 1, "SAME");
+}
+
+TEST_F(FusedResizePadConvOpTest, NoResizeConvOnlyComparative) {
+  CompareFusedPadOnlyAndSeparate(10, 10, 3, 0, 0, 4, 4, "REFLECT", 1, "SAME");
+}
+
+TEST_F(FusedResizePadConvOpTest, NoResizePadOnlyComparative) {
+  CompareFusedPadOnlyAndSeparate(4, 4, 1, 2, 2, 1, 1, "REFLECT", 1, "SAME");
+}
+
+TEST_F(FusedResizePadConvOpTest, NoResizePadOnlyWithChannelsComparative) {
+  CompareFusedPadOnlyAndSeparate(4, 4, 3, 2, 2, 1, 1, "REFLECT", 1, "SAME");
+}
+
+TEST_F(FusedResizePadConvOpTest, NoResizePadOnlySymmetricComparative) {
+  CompareFusedPadOnlyAndSeparate(4, 4, 1, 2, 2, 1, 1, "SYMMETRIC", 1, "SAME");
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index 0b5d159667..e06e14966b 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -654,6 +654,40 @@ strides: 1-D of length 4.  The stride of the sliding window for each dimension
 padding: The type of padding algorithm to use.
  )doc");
 
+REGISTER_OP("FusedPadConv2D")
+    .Input("input: T")
+    .Input("paddings: int32")
+    .Input("filter: T")
+    .Output("output: T")
+    .Attr("T: {half, float, double}")
+    .Attr(GetMirrorPadModeAttrString())
+    .Attr("strides: list(int)")
+    .Attr(GetPaddingAttrString())
+    .Doc(R"doc(
+Performs a padding as a preprocess during a convolution.
+
+Similar to FusedResizeAndPadConv2d, this op allows for an optimized
+implementation where the spatial padding transformation stage is fused with the
+im2col lookup, but in this case without the bilinear filtering required for
+resizing. Fusing the padding prevents the need to write out the intermediate
+results as whole tensors, reducing memory pressure, and we can get some latency
+gains by merging the transformation calculations.
+The data_format attribute for Conv2D isn't supported by this op, and 'NHWC'
+order is used instead.
+Internally this op uses a single per-graph scratch buffer, which means that it
+will block if multiple versions are being run in parallel. This is because this
+operator is primarily an optimization to minimize memory usage.
+
+input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
+paddings: A two-column matrix specifying the padding sizes. The number of
+  rows must be the same as the rank of `input`.
+filter: 4-D with shape
+  `[filter_height, filter_width, in_channels, out_channels]`.
+strides: 1-D of length 4.  The stride of the sliding window for each dimension
+   of `input`. Must be in the same order as the dimension specified with format.
+padding: The type of padding algorithm to use.
+ )doc");
+
 // --------------------------------------------------------------------------
 
 REGISTER_OP("DepthwiseConv2dNative")
diff --git a/tensorflow/examples/tutorials/deepdream/deepdream.ipynb b/tensorflow/examples/tutorials/deepdream/deepdream.ipynb
index bb6d70d5c6..cbcc54ce3c 100644
--- a/tensorflow/examples/tutorials/deepdream/deepdream.ipynb
+++ b/tensorflow/examples/tutorials/deepdream/deepdream.ipynb
@@ -623,7 +623,7 @@
     "<a id=\"laplacian\"></a>\n",
     "## Laplacian Pyramid Gradient Normalization\n",
     "\n",
-    "This looks better, but the resulting images mostly contain high frequencies. Can we improve it? One way is to add a smoothness prior into the optimization objective. This will effectively blur the image a little every iteration, suppressing the higher frequencies, so that the lower frequencies can catch up. This will require more iterations to produce a nice image. Why don't we just boost lower frequencies of the gradient instead? One way to achieve this is through the [Laplacian pyramid](https://en.wikipedia.org/wiki/Pyramid_%28image_processing%29#Laplacian_pyramid) decomposition. We call the resulting technique _Laplacian Pyramid Gradient Normailzation_."
+    "This looks better, but the resulting images mostly contain high frequencies. Can we improve it? One way is to add a smoothness prior into the optimization objective. This will effectively blur the image a little every iteration, suppressing the higher frequencies, so that the lower frequencies can catch up. This will require more iterations to produce a nice image. Why don't we just boost lower frequencies of the gradient instead? One way to achieve this is through the [Laplacian pyramid](https://en.wikipedia.org/wiki/Pyramid_%28image_processing%29#Laplacian_pyramid) decomposition. We call the resulting technique _Laplacian Pyramid Gradient Normalization_."
    ]
   },
   {
diff --git a/tensorflow/examples/tutorials/estimators/abalone.py b/tensorflow/examples/tutorials/estimators/abalone.py
index 978af5c9c3..6d8ce2cbc7 100644
--- a/tensorflow/examples/tutorials/estimators/abalone.py
+++ b/tensorflow/examples/tutorials/estimators/abalone.py
@@ -18,7 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 import tempfile
-import urllib
+from six.moves import urllib
 
 import numpy as np
 import tensorflow as tf
@@ -51,7 +51,7 @@ def maybe_download():
     train_file_name = FLAGS.train_data
   else:
     train_file = tempfile.NamedTemporaryFile(delete=False)
-    urllib.urlretrieve("http://download.tensorflow.org/data/abalone_train.csv", train_file.name)  # pylint: disable=line-too-long
+    urllib.request.urlretrieve("http://download.tensorflow.org/data/abalone_train.csv", train_file.name)  # pylint: disable=line-too-long
     train_file_name = train_file.name
     train_file.close()
     print("Training data is downloaded to %s" % train_file_name)
@@ -60,7 +60,7 @@ def maybe_download():
     test_file_name = FLAGS.test_data
   else:
     test_file = tempfile.NamedTemporaryFile(delete=False)
-    urllib.urlretrieve("http://download.tensorflow.org/data/abalone_test.csv", test_file.name)  # pylint: disable=line-too-long
+    urllib.request.urlretrieve("http://download.tensorflow.org/data/abalone_test.csv", test_file.name)  # pylint: disable=line-too-long
     test_file_name = test_file.name
     test_file.close()
     print("Test data is downloaded to %s" % test_file_name)
@@ -69,7 +69,7 @@ def maybe_download():
     predict_file_name = FLAGS.predict_data
   else:
     predict_file = tempfile.NamedTemporaryFile(delete=False)
-    urllib.urlretrieve("http://download.tensorflow.org/data/abalone_predict.csv", predict_file.name)  # pylint: disable=line-too-long
+    urllib.request.urlretrieve("http://download.tensorflow.org/data/abalone_predict.csv", predict_file.name)  # pylint: disable=line-too-long
     predict_file_name = predict_file.name
     predict_file.close()
     print("Prediction data is downloaded to %s" % predict_file_name)
diff --git a/tensorflow/examples/tutorials/input_fn/boston.py b/tensorflow/examples/tutorials/input_fn/boston.py
index 9d18bfa9e0..13914ea1c4 100644
--- a/tensorflow/examples/tutorials/input_fn/boston.py
+++ b/tensorflow/examples/tutorials/input_fn/boston.py
@@ -64,7 +64,7 @@ def main(unused_argv):
 
   # Print out predictions
   y = regressor.predict(input_fn=lambda: input_fn(prediction_set))
-  print ("Predictions: {}".format(str(y)))
+  print("Predictions: {}".format(str(y)))
 
 if __name__ == "__main__":
   tf.app.run()
diff --git a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
index 628c6e2741..c717693a56 100644
--- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
+++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
@@ -31,6 +31,7 @@ import tensorflow as tf
 # Step 1: Download the data.
 url = 'http://mattmahoney.net/dc/'
 
+
 def maybe_download(filename, expected_bytes):
   """Download a file if not present, and make sure it's the right size."""
   if not os.path.exists(filename):
@@ -60,6 +61,7 @@ print('Data size', len(words))
 # Step 2: Build the dictionary and replace rare words with UNK token.
 vocabulary_size = 50000
 
+
 def build_dataset(words):
   count = [['UNK', -1]]
   count.extend(collections.Counter(words).most_common(vocabulary_size - 1))
@@ -94,14 +96,14 @@ def generate_batch(batch_size, num_skips, skip_window):
   assert num_skips <= 2 * skip_window
   batch = np.ndarray(shape=(batch_size), dtype=np.int32)
   labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
-  span = 2 * skip_window + 1 # [ skip_window target skip_window ]
+  span = 2 * skip_window + 1  # [ skip_window target skip_window ]
   buffer = collections.deque(maxlen=span)
   for _ in range(span):
     buffer.append(data[data_index])
     data_index = (data_index + 1) % len(data)
   for i in range(batch_size // num_skips):
     target = skip_window  # target label at the center of the buffer
-    targets_to_avoid = [ skip_window ]
+    targets_to_avoid = [skip_window]
     for j in range(num_skips):
       while target in targets_to_avoid:
         target = random.randint(0, span - 1)
@@ -115,7 +117,7 @@ def generate_batch(batch_size, num_skips, skip_window):
 batch, labels = generate_batch(batch_size=8, num_skips=2, skip_window=1)
 for i in range(8):
   print(batch[i], reverse_dictionary[batch[i]],
-      '->', labels[i, 0], reverse_dictionary[labels[i, 0]])
+        '->', labels[i, 0], reverse_dictionary[labels[i, 0]])
 
 # Step 4: Build and train a skip-gram model.
 
@@ -187,7 +189,7 @@ with tf.Session(graph=graph) as session:
   for step in xrange(num_steps):
     batch_inputs, batch_labels = generate_batch(
         batch_size, num_skips, skip_window)
-    feed_dict = {train_inputs : batch_inputs, train_labels : batch_labels}
+    feed_dict = {train_inputs: batch_inputs, train_labels: batch_labels}
 
     # We perform one update step by evaluating the optimizer op (including it
     # in the list of returned values for session.run()
@@ -206,8 +208,8 @@ with tf.Session(graph=graph) as session:
       sim = similarity.eval()
       for i in xrange(valid_size):
         valid_word = reverse_dictionary[valid_examples[i]]
-        top_k = 8 # number of nearest neighbors
-        nearest = (-sim[i, :]).argsort()[1:top_k+1]
+        top_k = 8  # number of nearest neighbors
+        nearest = (-sim[i, :]).argsort()[1:top_k + 1]
         log_str = "Nearest to %s:" % valid_word
         for k in xrange(top_k):
           close_word = reverse_dictionary[nearest[k]]
@@ -217,11 +219,12 @@ with tf.Session(graph=graph) as session:
 
 # Step 6: Visualize the embeddings.
 
+
 def plot_with_labels(low_dim_embs, labels, filename='tsne.png'):
   assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings"
-  plt.figure(figsize=(18, 18))  #in inches
+  plt.figure(figsize=(18, 18))  # in inches
   for i, label in enumerate(labels):
-    x, y = low_dim_embs[i,:]
+    x, y = low_dim_embs[i, :]
     plt.scatter(x, y)
     plt.annotate(label,
                  xy=(x, y),
@@ -238,7 +241,7 @@ try:
 
   tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
   plot_only = 500
-  low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only,:])
+  low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only, :])
   labels = [reverse_dictionary[i] for i in xrange(plot_only)]
   plot_with_labels(low_dim_embs, labels)
 
diff --git a/tensorflow/g3doc/api_docs/python/client.md b/tensorflow/g3doc/api_docs/python/client.md
index 2d4c52a004..bd3252dfd5 100644
--- a/tensorflow/g3doc/api_docs/python/client.md
+++ b/tensorflow/g3doc/api_docs/python/client.md
@@ -134,7 +134,7 @@ Example:
    # v is the numpy array [10, 20]
    # 'fetches' can be a list.
    v = session.run([a, b])
-   # v a Python list with 2 numpy arrays: the numpy array [10, 20] and the
+   # v is a Python list with 2 numpy arrays: the numpy array [10, 20] and the
    # 1-D array [1.0, 2.0]
    # 'fetches' can be arbitrary lists, tuples, namedtuple, dicts:
    MyData = collections.namedtuple('MyData', ['a', 'b'])
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.conv2d_transpose.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.conv2d_transpose.md
index 5b2a0bb95b..e60a31ea5c 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.conv2d_transpose.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.conv2d_transpose.md
@@ -11,9 +11,9 @@ deconvolution.
 
 
 *  <b>`value`</b>: A 4-D `Tensor` of type `float` and shape
-    `[batch, height, width, in_channels]`.
+    `[batch, in_height, in_width, in_channels]`.
 *  <b>`filter`</b>: A 4-D `Tensor` with the same type as `value` and shape
-    `[height, width, output_channels, in_channels]`.  `filter`'s
+    `[filter_height, filter_width, output_channels, in_channels]`.  `filter`'s
     `in_channels` dimension must match that of `value`.
 *  <b>`output_shape`</b>: A 1-D `Tensor` representing the output shape of the
     deconvolution op.
diff --git a/tensorflow/g3doc/get_started/os_setup.md b/tensorflow/g3doc/get_started/os_setup.md
index 14f27ed4dd..e2c23f3024 100644
--- a/tensorflow/g3doc/get_started/os_setup.md
+++ b/tensorflow/g3doc/get_started/os_setup.md
@@ -859,7 +859,7 @@ and you left the Cuda or cuDNN version empty, try specifying them explicitly.
 ### Protobuf library related issues
 
 TensorFlow pip package depends on protobuf pip package version
-3.0.0b2. Protobuf's pip package downloaded from [PyPI](https://pypi.python.org)
+3.1.0. Protobuf's pip package downloaded from [PyPI](https://pypi.python.org)
 (when running `pip install protobuf`) is a Python only library, that has
 Python implementations of proto serialization/deserialization which can be
 10x-50x slower than the C++ implementation. Protobuf also supports a binary
@@ -877,14 +877,33 @@ $ pip install --upgrade https://storage.googleapis.com/tensorflow/linux/cpu/prot
 $ pip install --upgrade https://storage.googleapis.com/tensorflow/mac/cpu/protobuf-3.0.0-cp27-cp27m-macosx_10_11_x86_64.whl
 ```
 
-And for Python 3:
+And for Python 3.5:
 
 ```bash
 # Ubuntu/Linux 64-bit:
-$ pip3 install --upgrade https://storage.googleapis.com/tensorflow/linux/cpu/protobuf-3.0.0-cp3-none-linux_x86_64.whl
+$ pip3 install --upgrade https://storage.googleapis.com/tensorflow/linux/cpu/protobuf-3.0.0-cp35-cp35m-linux_x86_64.whl
 
 # Mac OS X:
-$ pip3 install --upgrade https://storage.googleapis.com/tensorflow/mac/cpu/protobuf-3.0.0-cp3-cp3m-macosx_10_11_x86_64.whl
+$ pip3 install --upgrade https://storage.googleapis.com/tensorflow/mac/cpu/protobuf-3.0.0-cp35-cp35m-macosx_10_11_x86_64.whl
+```
+
+If your system/configuration is not listed above, you can use the following
+instructions to build your own protobuf wheel file.
+To install its prerequisites, [see
+here](https://github.com/google/protobuf/blob/master/src/README.md):
+
+Then:
+```bash
+$ git clone https://github.com/google/protobuf.git
+$ cd protobuf
+$ ./autogen.sh
+$ CXXFLAGS="-fPIC -g -O2" ./configure
+$ make -j12
+$ export PROTOC=$PWD/src/protoc
+$ cd python
+$ python setup.py bdist_wheel --cpp_implementation --compile_static_extension
+$ pip uninstall protobuf
+$ pip install dist/<wheel file name>
 ```
 
 Install the above package _after_ you have installed TensorFlow via pip, as the
diff --git a/tensorflow/g3doc/tutorials/image_recognition/index.md b/tensorflow/g3doc/tutorials/image_recognition/index.md
index 3a7e50bd81..62b802c022 100644
--- a/tensorflow/g3doc/tutorials/image_recognition/index.md
+++ b/tensorflow/g3doc/tutorials/image_recognition/index.md
@@ -262,7 +262,7 @@ output data.
 
 This gives us a vector of `Tensor` objects, which in this case we know will only be a
 single object long. You can think of a `Tensor` as a multi-dimensional array in this
-context, and it holds a 299 pixel high, 299 pixel width, 3 channel image as float
+context, and it holds a 299 pixel high, 299 pixel wide, 3 channel image as float
 values. If you have your own image-processing framework in your product already, you
 should be able to use that instead, as long as you apply the same transformations
 before you feed images into the main graph.
diff --git a/tensorflow/g3doc/tutorials/word2vec/index.md b/tensorflow/g3doc/tutorials/word2vec/index.md
index 34f3d12f89..a9cc51207d 100644
--- a/tensorflow/g3doc/tutorials/word2vec/index.md
+++ b/tensorflow/g3doc/tutorials/word2vec/index.md
@@ -227,7 +227,7 @@ When we inspect these visualizations it becomes apparent that the vectors
 capture some general, and in fact quite useful, semantic information about
 words and their relationships to one another. It was very interesting when we
 first discovered that certain directions in the induced vector space specialize
-towards certain semantic relationships, e.g. *male-female*, *gender* and
+towards certain semantic relationships, e.g. *male-female*, *verb tense* and
 even *country-capital* relationships between words, as illustrated in the figure
 below (see also for example
 [Mikolov et al., 2013](http://www.aclweb.org/anthology/N13-1090)).
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 899a3169c7..7f68eacb1b 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -576,7 +576,7 @@ cuda_py_test(
 
 cuda_py_test(
     name = "division_past_test",
-    size = "small",
+    size = "medium",
     srcs = ["division_past_test.py"],
     additional_deps = [
         "//tensorflow:tensorflow_py",
diff --git a/tensorflow/python/kernel_tests/bias_op_test.py b/tensorflow/python/kernel_tests/bias_op_test.py
index 87027c8a78..862948610d 100644
--- a/tensorflow/python/kernel_tests/bias_op_test.py
+++ b/tensorflow/python/kernel_tests/bias_op_test.py
@@ -128,7 +128,13 @@ class BiasAddTest(tf.test.TestCase):
           input_tensor, np_input.shape, output_tensor, np_input.shape)
       bias_jacob_t, bias_jacob_n = tf.test.compute_gradient(
           bias_tensor, bias.shape, output_tensor, np_input.shape)
-
+         
+      # Test gradient of BiasAddGrad
+      bias_add_grad = tf.gradients(tf.nn.l2_loss(output_tensor),
+                                   bias_tensor)[0]
+      grad_jacob_t, grad_jacob_n = tf.test.compute_gradient(
+          output_tensor, np_input.shape, bias_add_grad, bias.shape)
+      
       if dtype == np.float16:
         # Compare fp16 theoretical gradients to fp32 numerical gradients,
         # since fp16 numerical gradients are too imprecise unless great
@@ -144,12 +150,18 @@ class BiasAddTest(tf.test.TestCase):
             input_tensor, np_input.shape, output_tensor, np_input.shape)
         _, bias_jacob_n = tf.test.compute_gradient(
             bias_tensor, bias.shape, output_tensor, np_input.shape)
-
+        
+        bias_add_grad = tf.gradients(tf.nn.l2_loss(output_tensor),
+                                     bias_tensor)[0]
+        _, grad_jacob_n = tf.test.compute_gradient(
+            output_tensor, np_input.shape, bias_add_grad, bias.shape)
+        
       threshold = 2e-3
       if dtype == tf.float64:
         threshold = 1e-10
       self.assertAllClose(tensor_jacob_t, tensor_jacob_n, threshold, threshold)
       self.assertAllClose(bias_jacob_t, bias_jacob_n, threshold, threshold)
+      self.assertAllClose(grad_jacob_t, grad_jacob_n, threshold, threshold)
 
   def testGradientTensor(self):
     for (data_format, use_gpu) in GetTestConfigs():
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 6b73a627e0..2bed2d5907 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -1162,7 +1162,7 @@ def zeros(shape, dtype=dtypes.float32, name=None):
   For example:
 
   ```python
-  tf.zeros([3, 4], int32) ==> [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
+  tf.zeros([3, 4], tf.int32) ==> [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
   ```
 
   Args:
@@ -1266,7 +1266,7 @@ def ones(shape, dtype=dtypes.float32, name=None):
   For example:
 
   ```python
-  tf.ones([2, 3], int32) ==> [[1, 1, 1], [1, 1, 1]]
+  tf.ones([2, 3], tf.int32) ==> [[1, 1, 1], [1, 1, 1]]
   ```
 
   Args:
diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py
index a0a4570f17..180a396adc 100644
--- a/tensorflow/python/ops/nn_grad.py
+++ b/tensorflow/python/ops/nn_grad.py
@@ -203,6 +203,43 @@ def _BiasAddGrad(op, received_grad):
   return (received_grad, gen_nn_ops.bias_add_grad(out_backprop=received_grad,
                                                   data_format=data_format))
 
+@ops.RegisterGradient("BiasAddGrad")
+def _BiasAddGradGrad(op, received_grad):
+  """Gradient for the BiasAddGrad op.
+
+  Args:
+    op: BiasAddGrad op for which we are calculating gradients.
+    received_grad: The gradients passed to the BiasAddGrad op.
+    
+  Returns:
+    A single gradient Tensor for the input to BiasAddGrad (which
+    is the gradient of the bias term in BiasAdd)
+  """
+  
+  try:
+    data_format = op.get_attr("data_format")
+  except ValueError:
+    data_format = None
+  
+  shape = array_ops.shape(op.inputs[0])
+  rank = array_ops.rank(op.inputs[0])
+  bias_shape = array_ops.shape(received_grad)
+  
+  if data_format == "NCHW":
+    expanded_shape = array_ops.concat(
+      0,
+      [array_ops.ones_like(shape[:-3]), bias_shape, array_ops.ones_like(shape[-2:])]
+    )
+    
+    tile_mults = array_ops.concat(0, [shape[:-3], [1], shape[-2:]])
+    
+  else:
+    expanded_shape = array_ops.concat(0, [array_ops.ones_like(shape[:-1]), bias_shape])
+    tile_mults = array_ops.concat(0, [shape[:-1], [1]])
+  
+  expanded_grad = array_ops.reshape(received_grad, expanded_shape)
+  return array_ops.tile(expanded_grad, tile_mults)
+  
 
 @ops.RegisterGradient("BiasAddV1")
 def _BiasAddGradV1(unused_bias_op, received_grad):
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 6486463b2c..b70cf83d91 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -1429,28 +1429,34 @@ ops.RegisterShape("AvgPool")(common_shapes.call_cpp_shape_fn)
 ops.RegisterShape("MaxPool")(common_shapes.call_cpp_shape_fn)
 
 
-@ops.RegisterShape("FusedResizeAndPadConv2D")
-def _FusedResizeAndPadConv2DShape(op):
-  """Shape function for FusedResizeAndPadConv2D op."""
+def _CommonFusedConvCalculations(op, has_resize):
+  """Shape function for Fused*Conv2D ops."""
   # The bilinear resize shape calculation.
   input_shape = op.inputs[0].get_shape().with_rank(4)
-  unused_size_shape = op.inputs[1].get_shape().merge_with([2])
-  size = tensor_util.constant_value(op.inputs[1])
-  if size is not None:
-    height = size[0]
-    width = size[1]
+  if has_resize:
+    unused_size_shape = op.inputs[1].get_shape().merge_with([2])
+    size = tensor_util.constant_value(op.inputs[1])
+    if size is not None:
+      height = size[0]
+      width = size[1]
+    else:
+      height = None
+      width = None
+    resized_shape = tensor_shape.TensorShape(
+        [input_shape[0], height, width, input_shape[3]])
+    paddings_index = 2
+    filter_index = 3
   else:
-    height = None
-    width = None
-  resized_shape = tensor_shape.TensorShape(
-      [input_shape[0], height, width, input_shape[3]])
+    resized_shape = input_shape
+    paddings_index = 1
+    filter_index = 2
 
   # Calculates the effect of the padding.
-  paddings_shape = op.inputs[2].get_shape().with_rank(2)
+  paddings_shape = op.inputs[paddings_index].get_shape().with_rank(2)
   resized_shape = resized_shape.with_rank(paddings_shape[0].value)
   paddings_shape = paddings_shape.merge_with(
       tensor_shape.matrix(resized_shape.ndims, 2))
-  paddings = tensor_util.constant_value(op.inputs[2])
+  paddings = tensor_util.constant_value(op.inputs[paddings_index])
   if paddings is None:
     padded_shape = tensor_shape.unknown_shape(ndims=resized_shape.ndims)
   else:
@@ -1462,7 +1468,7 @@ def _FusedResizeAndPadConv2DShape(op):
     padded_shape = tensor_shape.TensorShape(output_dims)
 
   # Finally work out the convolution's effect.
-  filter_shape = op.inputs[3].get_shape().with_rank(4)
+  filter_shape = op.inputs[filter_index].get_shape().with_rank(4)
 
   batch_size = padded_shape[0]
   in_rows = padded_shape[1]
@@ -1494,6 +1500,18 @@ def _FusedResizeAndPadConv2DShape(op):
   return [tensor_shape.TensorShape(output_shape)]
 
 
+@ops.RegisterShape("FusedResizeAndPadConv2D")
+def _FusedResizeAndPadConv2DShape(op):
+  """Shape function for FusedResizeAndPadConv2D op."""
+  return _CommonFusedConvCalculations(op, True)
+
+
+@ops.RegisterShape("FusedPadConv2D")
+def _FusedPadConv2DShape(op):
+  """Shape function for FusedResizeAndPadConv2D op."""
+  return _CommonFusedConvCalculations(op, False)
+
+
 ops.RegisterShape("MaxPoolWithArgmax")(common_shapes.call_cpp_shape_fn)
 
 
diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index ee68770606..f003b55396 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -1348,7 +1348,7 @@ def variable_scope(name_or_scope,
       a reuse scope, or if reuse is not `None` or `True`.
     TypeError: when the types of some arguments are not appropriate.
   """
-  if default_name is None and not name_or_scope:
+  if default_name is None and name_or_scope is None:
     raise TypeError("If default_name is None then name_or_scope is required")
   if values is None:
     values = []
diff --git a/tensorflow/python/tools/optimize_for_inference_lib.py b/tensorflow/python/tools/optimize_for_inference_lib.py
index 1cb5ba1625..8e040dcef7 100644
--- a/tensorflow/python/tools/optimize_for_inference_lib.py
+++ b/tensorflow/python/tools/optimize_for_inference_lib.py
@@ -48,6 +48,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import collections
 import math
 import re
 import numpy as np
@@ -84,7 +85,8 @@ def optimize_for_inference(input_graph_def, input_node_names,
                                                       placeholder_type_enum)
   optimized_graph_def = graph_util.remove_training_nodes(optimized_graph_def)
   optimized_graph_def = fold_batch_norms(optimized_graph_def)
-  optimized_graph_def = fuse_resize_and_conv(optimized_graph_def)
+  optimized_graph_def = fuse_resize_and_conv(optimized_graph_def,
+                                             output_node_names)
   ensure_graph_is_valid(optimized_graph_def)
   return optimized_graph_def
 
@@ -336,7 +338,7 @@ def fold_batch_norms(input_graph_def):
   return result_graph_def
 
 
-def fuse_resize_and_conv(input_graph_def):
+def fuse_resize_and_conv(input_graph_def, output_node_names):
   """Merges preceding resize and mirror pad ops into a specialized convolution.
 
   There's a common pattern of enlarging the input to a convolution using a
@@ -361,7 +363,14 @@ def fuse_resize_and_conv(input_graph_def):
     else:
       raise ValueError("Duplicate node names detected for ", node.name)
 
-  nodes_to_skip = {}
+  node_reference_count = collections.defaultdict(int)
+  for node in input_graph_def.node:
+    for input_name in node.input:
+      stripped_name = node_name_from_input(input_name)
+      node_reference_count[stripped_name] += 1
+  for output_name in output_node_names:
+    node_reference_count[output_name] += 1
+
   new_ops = []
   for node in input_graph_def.node:
 
@@ -373,20 +382,31 @@ def fuse_resize_and_conv(input_graph_def):
     if input_op.op == "MirrorPad":
       mirror_pad_op = input_op
       resize_op = node_from_map(input_node_map, mirror_pad_op.input[0])
+      if resize_op.op != "ResizeBilinear":
+        resize_op = None
     else:
       mirror_pad_op = None
-      resize_op = input_op
+      if input_op.op == "ResizeBilinear":
+        resize_op = input_op
+      else:
+        resize_op = None
 
-    if resize_op.op != "ResizeBilinear":
+    # There are no ops to be fused into the conv, so skip replacing this one.
+    if not mirror_pad_op and not resize_op:
       continue
 
-    nodes_to_skip[conv_op.name] = True
+    # We're replacing this node, so make sure the old one is removed.
+    node_reference_count[conv_op.name] = 0
     if mirror_pad_op:
-      nodes_to_skip[mirror_pad_op.name] = True
-    nodes_to_skip[resize_op.name] = True
+      node_reference_count[mirror_pad_op.name] -= 1
+    if resize_op:
+      node_reference_count[resize_op.name] -= 1
 
     fused_conv_op = tf.NodeDef()
-    fused_conv_op.op = "FusedResizeAndPadConv2D"
+    if resize_op:
+      fused_conv_op.op = "FusedResizeAndPadConv2D"
+    else:
+      fused_conv_op.op = "FusedPadConv2D"
     fused_conv_op.name = conv_op.name
     if mirror_pad_op:
       mirror_paddings_name = mirror_pad_op.input[1]
@@ -405,11 +425,15 @@ def fuse_resize_and_conv(input_graph_def):
       new_ops.extend([paddings_op])
       mirror_paddings_name = paddings_op.name
       mirror_paddings_mode = tf.AttrValue(s=b"REFLECT")
-    fused_conv_op.input.extend([resize_op.input[0], resize_op.input[1],
-                                mirror_paddings_name, conv_op.input[1]])
+    if resize_op:
+      fused_conv_op.input.extend([resize_op.input[0], resize_op.input[1],
+                                  mirror_paddings_name, conv_op.input[1]])
+      fused_conv_op.attr["resize_align_corners"].CopyFrom(
+          resize_op.attr["align_corners"])
+    else:
+      fused_conv_op.input.extend([mirror_pad_op.input[0], mirror_paddings_name,
+                                  conv_op.input[1]])
     fused_conv_op.attr["T"].CopyFrom(conv_op.attr["T"])
-    fused_conv_op.attr["resize_align_corners"].CopyFrom(
-        resize_op.attr["align_corners"])
     fused_conv_op.attr["mode"].CopyFrom(mirror_paddings_mode)
     fused_conv_op.attr["strides"].CopyFrom(conv_op.attr["strides"])
     fused_conv_op.attr["padding"].CopyFrom(conv_op.attr["padding"])
@@ -417,7 +441,7 @@ def fuse_resize_and_conv(input_graph_def):
 
   result_graph_def = tf.GraphDef()
   for node in input_graph_def.node:
-    if node.name in nodes_to_skip:
+    if node_reference_count[node.name] < 1:
       continue
     new_node = tf.NodeDef()
     new_node.CopyFrom(node)
diff --git a/tensorflow/python/tools/optimize_for_inference_test.py b/tensorflow/python/tools/optimize_for_inference_test.py
index d92d7ab8c7..57a90fbfe0 100644
--- a/tensorflow/python/tools/optimize_for_inference_test.py
+++ b/tensorflow/python/tools/optimize_for_inference_test.py
@@ -54,6 +54,7 @@ class OptimizeForInferenceTest(tf.test.TestCase):
                                              shape=shape)))
 
   def testOptimizeForInference(self):
+    self.maxDiff = 1000
     unused_constant_name = "unused_constant"
     unconnected_add_name = "unconnected_add"
     a_constant_name = "a_constant"
@@ -183,7 +184,7 @@ class OptimizeForInferenceTest(tf.test.TestCase):
       original_graph_def = sess.graph_def
       original_result = sess.run(["output:0"])
     optimized_graph_def = optimize_for_inference_lib.fuse_resize_and_conv(
-        original_graph_def)
+        original_graph_def, ["output"])
 
     with self.test_session() as sess:
       _ = tf.import_graph_def(optimized_graph_def, input_map={},
@@ -212,7 +213,7 @@ class OptimizeForInferenceTest(tf.test.TestCase):
       original_graph_def = sess.graph_def
       original_result = sess.run(["output:0"])
     optimized_graph_def = optimize_for_inference_lib.fuse_resize_and_conv(
-        original_graph_def)
+        original_graph_def, ["output"])
 
     with self.test_session() as sess:
       _ = tf.import_graph_def(optimized_graph_def, input_map={},
@@ -225,6 +226,34 @@ class OptimizeForInferenceTest(tf.test.TestCase):
       self.assertNotEqual("Conv2D", node.op)
       self.assertNotEqual("ResizeBilinear", node.op)
 
+  def testFusePadAndConv(self):
+    with self.test_session() as sess:
+      inputs = [1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6]
+      input_op = tf.constant(np.array(inputs), shape=[1, 2, 3, 2],
+                             dtype=tf.float32)
+      pad_op = tf.pad(input_op, [[0, 0], [1, 1], [2, 2], [0, 0]],
+                      mode="REFLECT")
+      weights = [1, 2, 3, 4, 0.1, 0.2, 0.3, 0.4]
+      weights_op = tf.constant(np.array(weights), shape=[1, 2, 2, 2],
+                               dtype=tf.float32)
+      tf.nn.conv2d(pad_op, weights_op, [1, 1, 1, 1],
+                   padding="VALID", name="output")
+      original_graph_def = sess.graph_def
+      original_result = sess.run(["output:0"])
+    optimized_graph_def = optimize_for_inference_lib.fuse_resize_and_conv(
+        original_graph_def, ["output"])
+
+    with self.test_session() as sess:
+      _ = tf.import_graph_def(optimized_graph_def, input_map={},
+                              name="optimized")
+      optimized_result = sess.run(["optimized/output:0"])
+
+    self.assertAllClose(original_result, optimized_result)
+
+    for node in optimized_graph_def.node:
+      self.assertNotEqual("Conv2D", node.op)
+      self.assertNotEqual("MirrorPad", node.op)
+
 
 if __name__ == "__main__":
   tf.test.main()
diff --git a/tensorflow/tools/ci_build/ci_build.sh b/tensorflow/tools/ci_build/ci_build.sh
index 2a8aeddc44..6f65bbc056 100755
--- a/tensorflow/tools/ci_build/ci_build.sh
+++ b/tensorflow/tools/ci_build/ci_build.sh
@@ -67,7 +67,6 @@ if [ "$#" -lt 1 ] || [ ! -e "${SCRIPT_DIR}/Dockerfile.${CONTAINER_TYPE}" ]; then
   exit 1
 fi
 
-
 # Optional arguments - environment variables. For example:
 # CI_DOCKER_EXTRA_PARAMS='-it --rm' CI_COMMAND_PREFIX='' tensorflow/tools/ci_build/ci_build.sh CPU /bin/bash
 CI_TENSORFLOW_SUBMODULE_PATH="${CI_TENSORFLOW_SUBMODULE_PATH:-.}"
@@ -79,6 +78,11 @@ if [[ ! -z "${TF_BUILD_DISABLE_GCP}" ]] &&
   CI_COMMAND_PREFIX+=("--disable-gcp")
 fi
 
+# cmake (CPU) builds do not require configuration.
+if [[ "${CONTAINER_TYPE}" == "cmake" ]]; then
+  CI_COMMAND_PREFIX=""
+fi
+
 # Helper function to traverse directories up until given file is found.
 function upsearch () {
   test / == "$PWD" && return || \
diff --git a/tensorflow/tools/ci_build/install/install_proto3.sh b/tensorflow/tools/ci_build/install/install_proto3.sh
index 2f1b7dd175..297acc49ee 100755
--- a/tensorflow/tools/ci_build/install/install_proto3.sh
+++ b/tensorflow/tools/ci_build/install/install_proto3.sh
@@ -19,7 +19,7 @@ set -e
 # Install protobuf3.
 
 # Select protobuf version.
-PROTOBUF_VERSION="3.0.0"
+PROTOBUF_VERSION="3.1.0"
 
 PROTOBUF_URL="https://github.com/google/protobuf/releases/download/v${PROTOBUF_VERSION}/protoc-${PROTOBUF_VERSION}-linux-x86_64.zip"
 PROTOBUF_ZIP=$(basename "${PROTOBUF_URL}")
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 7228197eb6..54d8e61532 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -31,7 +31,7 @@ _VERSION = '0.10.0'
 REQUIRED_PACKAGES = [
     'numpy >= 1.11.0',
     'six >= 1.10.0',
-    'protobuf == 3.0.0',
+    'protobuf == 3.1.0',
 ]
 
 # python3 requires wheel 0.26
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 5bd812e370..c856ce09fe 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -11,11 +11,17 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
   if tf_repo_name:
     print("tf_repo_name was specified to tf_workspace but is no longer used and will be removed in the future.")
 
+  # These lines need to be changed when updating Eigen. They are parsed from
+  # this file by the cmake and make builds to determine the eigen version and
+  # hash.
+  eigen_version = "c78d757b69d3"
+  eigen_sha256 = "dfb650e20a0dee6172dcc99796210a07e40af61348497503b42dc12935b4e6f5"
+
   native.new_http_archive(
     name = "eigen_archive",
-    url = "http://bitbucket.org/eigen/eigen/get/c78d757b69d3.tar.gz",
-    sha256 = "dfb650e20a0dee6172dcc99796210a07e40af61348497503b42dc12935b4e6f5",
-    strip_prefix = "eigen-eigen-c78d757b69d3",
+    url = "http://bitbucket.org/eigen/eigen/get/" + eigen_version + ".tar.gz",
+    sha256 = eigen_sha256,
+    strip_prefix = "eigen-eigen-" + eigen_version,
     build_file = str(Label("//:eigen.BUILD")),
   )
 
@@ -35,9 +41,9 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
 
   native.new_http_archive(
     name = "farmhash_archive",
-    url = "http://github.com/google/farmhash/archive/34c13ddfab0e35422f4c3979f360635a8c050260.zip",
-    sha256 = "e3d37a59101f38fd58fb799ed404d630f0eee18bfc2a2433910977cc8fea9c28",
-    strip_prefix = "farmhash-34c13ddfab0e35422f4c3979f360635a8c050260/src",
+    url = "http://github.com/google/farmhash/archive/71a777924015693c69bc3c8c6492fb8d5372c636.zip",
+    sha256 = "99190108fb96a5e38e183f6a23fb7742948214fc96a746a50c79eb09a255a298",
+    strip_prefix = "farmhash-71a777924015693c69bc3c8c6492fb8d5372c636/src",
     build_file = str(Label("//:farmhash.BUILD")),
   )
 
@@ -92,9 +98,9 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
 
   native.http_archive(
     name = "protobuf",
-    url = "http://github.com/google/protobuf/archive/v3.0.2.tar.gz",
-    sha256 = "b700647e11556b643ccddffd1f41d8cb7704ed02090af54cc517d44d912d11c1",
-    strip_prefix = "protobuf-3.0.2",
+    url = "http://github.com/google/protobuf/archive/v3.1.0.tar.gz",
+    sha256 = "0a0ae63cbffc274efb573bdde9a253e3f32e458c41261df51c5dbc5ad541e8f7",
+    strip_prefix = "protobuf-3.1.0",
   )
 
   native.new_http_archive(
diff --git a/third_party/gpus/crosstool/BUILD.tpl b/third_party/gpus/crosstool/BUILD.tpl
index 7c9c8ab884..5dc24f7b60 100644
--- a/third_party/gpus/crosstool/BUILD.tpl
+++ b/third_party/gpus/crosstool/BUILD.tpl
@@ -2,10 +2,12 @@ licenses(["restricted"])
 
 package(default_visibility = ["//visibility:public"])
 
-filegroup(
-    name = "crosstool",
-    srcs = ["CROSSTOOL"],
-    output_licenses = ["unencumbered"],
+cc_toolchain_suite(
+    name = "toolchain",
+    toolchains = {
+        "local|compiler": ":cc-compiler-local",
+        "darwin|compiler": ":cc-compiler-darwin",
+    },
 )
 
 cc_toolchain(
diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
index 06ae39e8b9..31bf8cc3d8 100644
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@@ -331,6 +331,33 @@ def _file(repository_ctx, label):
       {})
 
 
+_DUMMY_CROSSTOOL_BZL_FILE = """
+def error_gpu_disabled():
+  fail("ERROR: Building with --config=cuda but TensorFlow is not configured " +
+       "to build with GPU support. Please re-run ./configure and enter 'Y' " +
+       "at the prompt to build with GPU support.")
+
+  native.genrule(
+      name = "error_gen_crosstool",
+      outs = ["CROSSTOOL"],
+      cmd = "echo 'Should not be run.' && exit 1",
+  )
+
+  native.filegroup(
+      name = "crosstool",
+      srcs = [":CROSSTOOL"],
+      output_licenses = ["unencumbered"],
+  )
+"""
+
+
+_DUMMY_CROSSTOOL_BUILD_FILE = """
+load("//crosstool:error_gpu_disabled.bzl", "error_gpu_disabled")
+
+error_gpu_disabled()
+"""
+
+
 def _create_dummy_repository(repository_ctx):
   cpu_value = _cpu_value(repository_ctx)
   symlink_files = _cuda_symlink_files(cpu_value, _DEFAULT_CUDA_VERSION,
@@ -371,6 +398,12 @@ def _create_dummy_repository(repository_ctx):
                for c in _DEFAULT_CUDA_COMPUTE_CAPABILITIES]),
        })
 
+  # If cuda_configure is not configured to build with GPU support, and the user
+  # attempts to build with --config=cuda, add a dummy build rule to intercept
+  # this and fail with an actionable error message.
+  repository_ctx.file("crosstool/error_gpu_disabled.bzl",
+                      _DUMMY_CROSSTOOL_BZL_FILE)
+  repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
 
 def _symlink_dir(repository_ctx, src_dir, dest_dir):
   """Symlinks all the files in a directory.
diff --git a/tools/bazel.rc.template b/tools/bazel.rc.template
index 9a69cac1f6..58dd7434a8 100644
--- a/tools/bazel.rc.template
+++ b/tools/bazel.rc.template
@@ -1,4 +1,4 @@
-build:cuda --crosstool_top=@local_config_cuda//crosstool
+build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
 build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true
 
 build --force_python=py$PYTHON_MAJOR_VERSION