diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2018-01-12 06:15:05 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-01-12 06:18:51 -0800 |
commit | 0bb22b8d777ff8397c93efd10fa62fe9b46081c0 (patch) | |
tree | def00e5dd20b21e6431783113369f5a49fa97eb9 /tensorflow/contrib/nccl | |
parent | 4149938e5e1aa72d794caa127fdcd741a6abf90d (diff) |
Moves CUDA-only condition of NCCL further up to dependent targets.
Removes NCCL kernel registration in non-CUDA builds (but retains NCCL ops).
Removes unused python/ops/_nccl_ops.so target.
PiperOrigin-RevId: 181736230
Diffstat (limited to 'tensorflow/contrib/nccl')
-rw-r--r-- | tensorflow/contrib/nccl/BUILD | 30 | ||||
-rw-r--r-- | tensorflow/contrib/nccl/python/ops/nccl_ops_test.py | 24 |
2 files changed, 29 insertions, 25 deletions
diff --git a/tensorflow/contrib/nccl/BUILD b/tensorflow/contrib/nccl/BUILD index df9dbb457a..5ac96007df 100644 --- a/tensorflow/contrib/nccl/BUILD +++ b/tensorflow/contrib/nccl/BUILD @@ -23,15 +23,17 @@ load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library") tf_custom_op_library( name = "python/ops/_nccl_ops.so", srcs = [ + "ops/nccl_ops.cc", + ], + gpu_srcs = [ "kernels/nccl_manager.cc", "kernels/nccl_manager.h", "kernels/nccl_ops.cc", - "ops/nccl_ops.cc", ], - deps = [ - "//tensorflow/core:gpu_headers_lib", + deps = if_cuda([ "@nccl_archive//:nccl", - ], + "//tensorflow/core:gpu_headers_lib", + ]), ) tf_cuda_cc_test( @@ -52,17 +54,14 @@ tf_cuda_cc_test( "no_oss", "notap", ], - deps = if_cuda( + deps = [ - "@nccl_archive//:nccl", "//tensorflow/core:cuda", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "@nccl_archive//:nccl", ], - [], - ) + [ - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], ) tf_kernel_library( @@ -103,11 +102,8 @@ tf_custom_op_py_library( "__init__.py", "python/ops/nccl_ops.py", ], - dso = [ - ":python/ops/_nccl_ops.so", - ], - kernels = [ - ":nccl_kernels", + dso = [":python/ops/_nccl_ops.so"], + kernels = if_cuda([":nccl_kernels"]) + [ ":nccl_ops_op_lib", ], srcs_version = "PY2AND3", diff --git a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py index bad0abd44c..436ebf1da7 100644 --- a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py +++ b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py @@ -77,10 +77,6 @@ class NcclTestCase(test.TestCase): # same communicator across multiple sessions. with self.test_session(use_gpu=True) as sess: - # Check GPU availability *after* creating test session, see b/68975239. - if not test.is_gpu_available(): - return # Test requires access to a GPU - for devices in device_sets: shape = (3, 4) random = (np.random.random_sample(shape) - .5) * 1024 @@ -100,6 +96,11 @@ class NcclTestCase(test.TestCase): result_tensors = [array_ops.identity(t) for t in reduce_tensors] + # Check GPU availability *after* creating session, see b/68975239. + if not test.is_gpu_available(): + # If no GPU is available, only test graph construction. + continue + # Test execution and results. for t in sess.run(result_tensors): self.assertAllClose(t, np_ans) @@ -114,6 +115,7 @@ class NcclTestCase(test.TestCase): numpy_fn: A function taking two tensors and returning the gradient of the reduction of the two. """ + def _Gradient(tensors, devices): inputs = [array_ops.placeholder(t.dtype, t.shape) for t in tensors] reduce_tensors = nccl_reduce(inputs, devices) @@ -164,12 +166,18 @@ class BroadcastTest(NcclTestCase): (['/device:GPU:0', '/device:GPU:0'],)) def testBroadcastToCpuError(self): - # Broadcasts to CPU is not supported. - with self.assertRaisesRegexp( - errors.NotFoundError, - "No registered '_NcclBroadcastRecv' OpKernel for CPU devices"): + try: + # Broadcasts to CPU is not supported. self._Test(_NcclBroadcast, lambda x, y: x, (['/device:GPU:0', '/device:CPU:0'],)) + except errors.NotFoundError as e: + self.assertRegexpMatches( + e.value, + "No registered '_NcclBroadcastRecv' OpKernel for CPU devices") + else: + # Session isn't executed when no GPU is available. + if test.is_gpu_available(): + self.fail("Didn't raise NotFoundError trying to broadcast to CPU") class CombinedTest(NcclTestCase): |