Merge changes from github.

Change: 152200430
author: A. Unique TensorFlower <gardener@tensorflow.org> 2017-04-04 16:10:08 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-04-04 17:24:57 -0700
commit: ccbc8991db3943ef984405881a1c917c530f902f (patch)
tree: a7b5c760155bfa4ff95ffc0ebd3823c649668997 /third_party
parent: 9477900946f923cb43ed76ed215490d01474bfe7 (diff)
2 files changed, 29 insertions, 16 deletions
diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
index bbe0442eaf..05ff584be0 100644
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@@ -39,6 +39,11 @@ _DEFAULT_CUDA_COMPUTE_CAPABILITIES = ["3.5", "5.2"]
 # BEGIN cc_configure common functions.
 def find_cc(repository_ctx):
   """Find the C++ compiler."""
+  # On Windows, we use Bazel's MSVC CROSSTOOL for GPU build
+  # Return a dummy value for GCC detection here to avoid error
+  if _cpu_value(repository_ctx) == "Windows":
+    return "/use/--config x64_windows_msvc/instead"
+
   if _use_cuda_clang(repository_ctx):
     target_cc_name = "clang"
     cc_path_envvar = _CLANG_CUDA_COMPILER_PATH
@@ -297,7 +302,7 @@ def _find_cuda_define(repository_ctx, cudnn_header_dir, define):
   cudnn_h_path = repository_ctx.path("%s/cudnn.h" % cudnn_header_dir)
   if not cudnn_h_path.exists:
     auto_configure_fail("Cannot find cudnn.h at %s" % str(cudnn_h_path))
-  result = repository_ctx.execute(["grep", "-E", define, str(cudnn_h_path)])
+  result = repository_ctx.execute(["grep", "--color=never", "-E", define, str(cudnn_h_path)])
   if result.stderr:
     auto_configure_fail("Error reading %s: %s" %
                         (result.stderr, str(cudnn_h_path)))
@@ -874,6 +879,7 @@ def _cuda_autoconf_impl(repository_ctx):
     _create_cuda_repository(repository_ctx)
 
 
+
 cuda_configure = repository_rule(
     implementation = _cuda_autoconf_impl,
     environ = [
diff --git a/third_party/libxsmm.BUILD b/third_party/libxsmm.BUILD
index 037009c072..53a814b4b8 100644
--- a/third_party/libxsmm.BUILD
+++ b/third_party/libxsmm.BUILD
@@ -12,7 +12,7 @@ libxsmm_interface_arguments = "0 1"
 
 # Arguments to ./scripts/libxsmm_config.py, see that file for detailed description.
 #  ilp64: 0 (no)
-#  big: 0 (no)
+#  big: 1 (yes)
 #  offload: 0 (no)
 #  alignment [b]
 #  prefetch: -1 (auto)
@@ -22,7 +22,7 @@ libxsmm_interface_arguments = "0 1"
 #  flags: 0 (none)
 #  alpha = 1
 #  beta = 1
-libxsmm_config_arguments = "0 0 0 64 -1 0 1 1 0 1 1"
+libxsmm_config_arguments = "0 1 0 64 -1 0 1 1 0 1 1"
 
 # Arguments to ./scripts/libxsmm_dispatch.py, see that file for detailed description.
 #  (dummy argument)
@@ -56,22 +56,26 @@ genrule(
 cc_library(
     name = "xsmm_avx",
     srcs = [
-        "src/libxsmm_main.c",
+        "src/libxsmm_cpuid_x86.c",
+        "src/libxsmm_dnn.c",
+        "src/libxsmm_dnn_convolution_backward.c",
+        "src/libxsmm_dnn_convolution_forward.c",
+        "src/libxsmm_dnn_convolution_weight_update.c",
+        "src/libxsmm_dnn_convolution_winograd_backward.c",
+        "src/libxsmm_dnn_convolution_winograd_forward.c",
+        "src/libxsmm_dnn_convolution_winograd_weight_update.c",
+        "src/libxsmm_dnn_handle.c",
         "src/libxsmm_dump.c",
-        "src/libxsmm_malloc.c",
+        "src/libxsmm_fsspmdm.c",
         "src/libxsmm_gemm.c",
+        "src/libxsmm_main.c",
+        "src/libxsmm_malloc.c",
+        "src/libxsmm_perf.c",
+        "src/libxsmm_spmdm.c",
+        "src/libxsmm_sync.c",
         "src/libxsmm_timer.c",
         "src/libxsmm_trace.c",
         "src/libxsmm_trans.c",
-        "src/libxsmm_sync.c",
-        "src/libxsmm_perf.c",
-        "src/libxsmm_spmdm.c",
-        "src/libxsmm_dnn.c",
-        "src/libxsmm_dnn_handle.c",
-        "src/libxsmm_dnn_convolution_forward.c",
-        "src/libxsmm_dnn_convolution_backward.c",
-        "src/libxsmm_dnn_convolution_weight_update.c",
-        "src/libxsmm_cpuid_x86.c",
     ] + glob([
         "src/generator_*.c",
     ]),
@@ -79,6 +83,7 @@ cc_library(
         "include/libxsmm_cpuid.h",
         "include/libxsmm_dnn.h",
         "include/libxsmm_frontend.h",
+        "include/libxsmm_fsspmdm.h",
         "include/libxsmm_generator.h",
         "include/libxsmm_intrinsics_x86.h",
         "include/libxsmm_macros.h",
@@ -91,14 +96,16 @@ cc_library(
         "include/libxsmm.h",
         "include/libxsmm_config.h",
         "include/libxsmm_dispatch.h",
-    ],
+    ] + glob([
+        # trigger rebuild if template changed
+        "src/template/*.c",
+    ]),
     copts = [
         "-mavx",  # JIT does not work without avx anyway, and this silences some CRC32 warnings.
         "-Wno-vla",  # Libxsmm convolutions heavily use VLA.
     ],
     defines = [
         "LIBXSMM_BUILD",
-        "LIBXSMM_CPUID_X86_NOINLINE",
         "__BLAS=0",
     ],
     includes = [
author	A. Unique TensorFlower <gardener@tensorflow.org>	2017-04-04 16:10:08 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-04-04 17:24:57 -0700
commit	ccbc8991db3943ef984405881a1c917c530f902f (patch)
tree	a7b5c760155bfa4ff95ffc0ebd3823c649668997 /third_party
parent	9477900946f923cb43ed76ed215490d01474bfe7 (diff)