Moves CUDA-only condition of NCCL further up to dependent targets.

Removes NCCL kernel registration in non-CUDA builds (but retains NCCL ops). Removes unused python/ops/_nccl_ops.so target. PiperOrigin-RevId: 181736230
author: A. Unique TensorFlower <gardener@tensorflow.org> 2018-01-12 06:15:05 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-01-12 06:18:51 -0800
commit: 0bb22b8d777ff8397c93efd10fa62fe9b46081c0 (patch)
tree: def00e5dd20b21e6431783113369f5a49fa97eb9 /tensorflow/contrib/nccl
parent: 4149938e5e1aa72d794caa127fdcd741a6abf90d (diff)
2 files changed, 29 insertions, 25 deletions
diff --git a/tensorflow/contrib/nccl/BUILD b/tensorflow/contrib/nccl/BUILD
index df9dbb457a..5ac96007df 100644
--- a/tensorflow/contrib/nccl/BUILD
+++ b/tensorflow/contrib/nccl/BUILD
@@ -23,15 +23,17 @@ load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
 tf_custom_op_library(
     name = "python/ops/_nccl_ops.so",
     srcs = [
+        "ops/nccl_ops.cc",
+    ],
+    gpu_srcs = [
         "kernels/nccl_manager.cc",
         "kernels/nccl_manager.h",
         "kernels/nccl_ops.cc",
-        "ops/nccl_ops.cc",
     ],
-    deps = [
-        "//tensorflow/core:gpu_headers_lib",
+    deps = if_cuda([
         "@nccl_archive//:nccl",
-    ],
+        "//tensorflow/core:gpu_headers_lib",
+    ]),
 )
 
 tf_cuda_cc_test(
@@ -52,17 +54,14 @@ tf_cuda_cc_test(
         "no_oss",
         "notap",
     ],
-    deps = if_cuda(
+    deps =
         [
-            "@nccl_archive//:nccl",
             "//tensorflow/core:cuda",
+            "//tensorflow/core:test",
+            "//tensorflow/core:test_main",
+            "//tensorflow/core:testlib",
+            "@nccl_archive//:nccl",
         ],
-        [],
-    ) + [
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-    ],
 )
 
 tf_kernel_library(
@@ -103,11 +102,8 @@ tf_custom_op_py_library(
         "__init__.py",
         "python/ops/nccl_ops.py",
     ],
-    dso = [
-        ":python/ops/_nccl_ops.so",
-    ],
-    kernels = [
-        ":nccl_kernels",
+    dso = [":python/ops/_nccl_ops.so"],
+    kernels = if_cuda([":nccl_kernels"]) + [
         ":nccl_ops_op_lib",
     ],
     srcs_version = "PY2AND3",
diff --git a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py
index bad0abd44c..436ebf1da7 100644
--- a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py
+++ b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py
@@ -77,10 +77,6 @@ class NcclTestCase(test.TestCase):
       # same communicator across multiple sessions.
       with self.test_session(use_gpu=True) as sess:
 
-        # Check GPU availability *after* creating test session, see b/68975239.
-        if not test.is_gpu_available():
-          return  # Test requires access to a GPU
-
         for devices in device_sets:
           shape = (3, 4)
           random = (np.random.random_sample(shape) - .5) * 1024
@@ -100,6 +96,11 @@ class NcclTestCase(test.TestCase):
 
           result_tensors = [array_ops.identity(t) for t in reduce_tensors]
 
+          # Check GPU availability *after* creating session, see b/68975239.
+          if not test.is_gpu_available():
+            # If no GPU is available, only test graph construction.
+            continue
+
           # Test execution and results.
           for t in sess.run(result_tensors):
             self.assertAllClose(t, np_ans)
@@ -114,6 +115,7 @@ class NcclTestCase(test.TestCase):
       numpy_fn: A function taking two tensors and returning the gradient of the
           reduction of the two.
     """
+
     def _Gradient(tensors, devices):
       inputs = [array_ops.placeholder(t.dtype, t.shape) for t in tensors]
       reduce_tensors = nccl_reduce(inputs, devices)
@@ -164,12 +166,18 @@ class BroadcastTest(NcclTestCase):
                (['/device:GPU:0', '/device:GPU:0'],))
 
   def testBroadcastToCpuError(self):
-    # Broadcasts to CPU is not supported.
-    with self.assertRaisesRegexp(
-        errors.NotFoundError,
-        "No registered '_NcclBroadcastRecv' OpKernel for CPU devices"):
+    try:
+      # Broadcasts to CPU is not supported.
       self._Test(_NcclBroadcast, lambda x, y: x,
                  (['/device:GPU:0', '/device:CPU:0'],))
+    except errors.NotFoundError as e:
+      self.assertRegexpMatches(
+          e.value,
+          "No registered '_NcclBroadcastRecv' OpKernel for CPU devices")
+    else:
+      # Session isn't executed when no GPU is available.
+      if test.is_gpu_available():
+        self.fail("Didn't raise NotFoundError trying to broadcast to CPU")
 
 
 class CombinedTest(NcclTestCase):
author	A. Unique TensorFlower <gardener@tensorflow.org>	2018-01-12 06:15:05 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-01-12 06:18:51 -0800
commit	0bb22b8d777ff8397c93efd10fa62fe9b46081c0 (patch)
tree	def00e5dd20b21e6431783113369f5a49fa97eb9 /tensorflow/contrib/nccl
parent	4149938e5e1aa72d794caa127fdcd741a6abf90d (diff)