aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/nccl
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-01-12 06:15:05 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-01-12 06:18:51 -0800
commit0bb22b8d777ff8397c93efd10fa62fe9b46081c0 (patch)
treedef00e5dd20b21e6431783113369f5a49fa97eb9 /tensorflow/contrib/nccl
parent4149938e5e1aa72d794caa127fdcd741a6abf90d (diff)
Moves CUDA-only condition of NCCL further up to dependent targets.
Removes NCCL kernel registration in non-CUDA builds (but retains NCCL ops). Removes unused python/ops/_nccl_ops.so target. PiperOrigin-RevId: 181736230
Diffstat (limited to 'tensorflow/contrib/nccl')
-rw-r--r--tensorflow/contrib/nccl/BUILD30
-rw-r--r--tensorflow/contrib/nccl/python/ops/nccl_ops_test.py24
2 files changed, 29 insertions, 25 deletions
diff --git a/tensorflow/contrib/nccl/BUILD b/tensorflow/contrib/nccl/BUILD
index df9dbb457a..5ac96007df 100644
--- a/tensorflow/contrib/nccl/BUILD
+++ b/tensorflow/contrib/nccl/BUILD
@@ -23,15 +23,17 @@ load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
tf_custom_op_library(
name = "python/ops/_nccl_ops.so",
srcs = [
+ "ops/nccl_ops.cc",
+ ],
+ gpu_srcs = [
"kernels/nccl_manager.cc",
"kernels/nccl_manager.h",
"kernels/nccl_ops.cc",
- "ops/nccl_ops.cc",
],
- deps = [
- "//tensorflow/core:gpu_headers_lib",
+ deps = if_cuda([
"@nccl_archive//:nccl",
- ],
+ "//tensorflow/core:gpu_headers_lib",
+ ]),
)
tf_cuda_cc_test(
@@ -52,17 +54,14 @@ tf_cuda_cc_test(
"no_oss",
"notap",
],
- deps = if_cuda(
+ deps =
[
- "@nccl_archive//:nccl",
"//tensorflow/core:cuda",
+ "//tensorflow/core:test",
+ "//tensorflow/core:test_main",
+ "//tensorflow/core:testlib",
+ "@nccl_archive//:nccl",
],
- [],
- ) + [
- "//tensorflow/core:test",
- "//tensorflow/core:test_main",
- "//tensorflow/core:testlib",
- ],
)
tf_kernel_library(
@@ -103,11 +102,8 @@ tf_custom_op_py_library(
"__init__.py",
"python/ops/nccl_ops.py",
],
- dso = [
- ":python/ops/_nccl_ops.so",
- ],
- kernels = [
- ":nccl_kernels",
+ dso = [":python/ops/_nccl_ops.so"],
+ kernels = if_cuda([":nccl_kernels"]) + [
":nccl_ops_op_lib",
],
srcs_version = "PY2AND3",
diff --git a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py
index bad0abd44c..436ebf1da7 100644
--- a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py
+++ b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py
@@ -77,10 +77,6 @@ class NcclTestCase(test.TestCase):
# same communicator across multiple sessions.
with self.test_session(use_gpu=True) as sess:
- # Check GPU availability *after* creating test session, see b/68975239.
- if not test.is_gpu_available():
- return # Test requires access to a GPU
-
for devices in device_sets:
shape = (3, 4)
random = (np.random.random_sample(shape) - .5) * 1024
@@ -100,6 +96,11 @@ class NcclTestCase(test.TestCase):
result_tensors = [array_ops.identity(t) for t in reduce_tensors]
+ # Check GPU availability *after* creating session, see b/68975239.
+ if not test.is_gpu_available():
+ # If no GPU is available, only test graph construction.
+ continue
+
# Test execution and results.
for t in sess.run(result_tensors):
self.assertAllClose(t, np_ans)
@@ -114,6 +115,7 @@ class NcclTestCase(test.TestCase):
numpy_fn: A function taking two tensors and returning the gradient of the
reduction of the two.
"""
+
def _Gradient(tensors, devices):
inputs = [array_ops.placeholder(t.dtype, t.shape) for t in tensors]
reduce_tensors = nccl_reduce(inputs, devices)
@@ -164,12 +166,18 @@ class BroadcastTest(NcclTestCase):
(['/device:GPU:0', '/device:GPU:0'],))
def testBroadcastToCpuError(self):
- # Broadcasts to CPU is not supported.
- with self.assertRaisesRegexp(
- errors.NotFoundError,
- "No registered '_NcclBroadcastRecv' OpKernel for CPU devices"):
+ try:
+ # Broadcasts to CPU is not supported.
self._Test(_NcclBroadcast, lambda x, y: x,
(['/device:GPU:0', '/device:CPU:0'],))
+ except errors.NotFoundError as e:
+ self.assertRegexpMatches(
+ e.value,
+ "No registered '_NcclBroadcastRecv' OpKernel for CPU devices")
+ else:
+ # Session isn't executed when no GPU is available.
+ if test.is_gpu_available():
+ self.fail("Didn't raise NotFoundError trying to broadcast to CPU")
class CombinedTest(NcclTestCase):