1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
# NVIDIA nccl
# A package of optimized primitives for collective multi-GPU communication.
licenses(["notice"]) # BSD
exports_files(["LICENSE.txt"])
load("@local_config_cuda//cuda:build_defs.bzl", "cuda_default_copts", "if_cuda")
SRCS = [
"src/all_gather.cu",
"src/all_reduce.cu",
"src/broadcast.cu",
"src/core.cu",
"src/libwrap.cu",
"src/reduce.cu",
"src/reduce_scatter.cu",
]
# Copy .cu to .cu.cc so they can be in srcs of cc_library.
[
genrule(
name = "gen_" + src,
srcs = [src],
outs = [src + ".cc"],
cmd = "cp $(location " + src + ") $(location " + src + ".cc)",
)
for src in SRCS
]
SRCS_CU_CC = [src + ".cc" for src in SRCS]
cc_library(
name = "nccl",
srcs = if_cuda(SRCS_CU_CC + glob(["src/*.h"])),
hdrs = if_cuda(["src/nccl.h"]),
copts = [
"-DCUDA_MAJOR=0",
"-DCUDA_MINOR=0",
"-DNCCL_MAJOR=0",
"-DNCCL_MINOR=0",
"-DNCCL_PATCH=0",
"-Iexternal/nccl_archive/src",
"-O3",
] + cuda_default_copts(),
linkopts = select({
"@%ws%//tensorflow:android": [
"-pie",
],
"@%ws%//tensorflow:darwin": [
"-Wl,-framework",
"-Wl,CoreFoundation",
"-Wl,-framework",
"-Wl,Security",
],
"@%ws%//tensorflow:ios": [],
"@%ws%//tensorflow:windows": [
"ws2_32.lib",
],
"//conditions:default": [
"-lrt",
],
}),
visibility = ["//visibility:public"],
deps = ["@local_config_cuda//cuda:cuda_headers"],
)
|