aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-09-27 10:22:55 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-09-27 10:22:55 -0700
commit62e60166de65d6604b897f2575a5accc86160496 (patch)
tree4f0c3b8d7915de7f328bfeaaa1a0312f7df481a2
parentcd1bdeafecf39bc55409b75cf27cecf273237ca2 (diff)
parent69d3b8faf41791834301a74a05e288964940427d (diff)
Merge pull request #20277 from ROCmSoftwarePlatform:upstream-staging
PiperOrigin-RevId: 214793113
-rw-r--r--configure.py20
-rw-r--r--tensorflow/core/BUILD3
-rw-r--r--tensorflow/tensorflow.bzl52
-rw-r--r--tensorflow/tools/ci_build/Dockerfile.rocm97
-rwxr-xr-xtensorflow/tools/ci_build/builds/docker_test.sh9
-rwxr-xr-xtensorflow/tools/ci_build/builds/pip.sh4
-rwxr-xr-xtensorflow/tools/ci_build/builds/with_the_same_user6
-rwxr-xr-xtensorflow/tools/ci_build/ci_build.sh11
-rwxr-xr-xtensorflow/tools/ci_build/linux/cpu/run_cc_core.sh1
-rwxr-xr-xtensorflow/tools/ci_build/linux/cpu/run_py2_core.sh1
-rwxr-xr-xtensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh1
-rwxr-xr-xtensorflow/tools/ci_build/linux/cpu/run_py3_core.sh1
-rwxr-xr-xtensorflow/tools/ci_build/linux/libtensorflow.sh3
-rwxr-xr-xtensorflow/tools/ci_build/linux/libtensorflow_cpu.sh1
-rwxr-xr-xtensorflow/tools/ci_build/linux/libtensorflow_docker.sh6
-rwxr-xr-xtensorflow/tools/ci_build/linux/libtensorflow_rocm.sh22
-rwxr-xr-xtensorflow/tools/ci_build/linux/rocm/run_cc_core.sh39
-rwxr-xr-xtensorflow/tools/ci_build/linux/rocm/run_py3_core.sh39
-rwxr-xr-xtensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh1
-rwxr-xr-xtensorflow/tools/ci_build/osx/libtensorflow_cpu.sh1
-rwxr-xr-xtensorflow/tools/ci_build/osx/libtensorflow_gpu.sh1
-rwxr-xr-xtensorflow/tools/ci_build/osx/libtensorflow_rocm.sh36
-rwxr-xr-xtensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh41
-rwxr-xr-xtensorflow/workspace.bzl2
-rw-r--r--third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl158
-rwxr-xr-xthird_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl241
-rw-r--r--third_party/gpus/rocm/BUILD0
-rw-r--r--third_party/gpus/rocm/BUILD.tpl99
-rw-r--r--third_party/gpus/rocm/build_defs.bzl.tpl45
-rw-r--r--third_party/gpus/rocm/rocm_config.h.tpl21
-rw-r--r--third_party/gpus/rocm_configure.bzl784
-rw-r--r--tools/bazel.rc3
32 files changed, 1731 insertions, 18 deletions
diff --git a/configure.py b/configure.py
index 3fcaaa9d0e..cc6a654a61 100644
--- a/configure.py
+++ b/configure.py
@@ -1540,6 +1540,13 @@ def main():
else:
set_trisycl_include_dir(environ_cp)
+ set_action_env_var(environ_cp, 'TF_NEED_ROCM', 'ROCm', False)
+ if (environ_cp.get('TF_NEED_ROCM') == '1' and
+ 'LD_LIBRARY_PATH' in environ_cp and
+ environ_cp.get('LD_LIBRARY_PATH') != '1'):
+ write_action_env_to_bazelrc('LD_LIBRARY_PATH',
+ environ_cp.get('LD_LIBRARY_PATH'))
+
set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False)
if (environ_cp.get('TF_NEED_CUDA') == '1' and
'TF_CUDA_CONFIG_REPO' not in environ_cp):
@@ -1580,6 +1587,19 @@ def main():
write_to_bazelrc('build --config=download_clang')
write_to_bazelrc('test --config=download_clang')
+ # SYCL / ROCm / CUDA are mutually exclusive.
+ # At most 1 GPU platform can be configured.
+ gpu_platform_count = 0
+ if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1':
+ gpu_platform_count += 1
+ if environ_cp.get('TF_NEED_ROCM') == '1':
+ gpu_platform_count += 1
+ if environ_cp.get('TF_NEED_CUDA') == '1':
+ gpu_platform_count += 1
+ if gpu_platform_count >= 2:
+ raise UserInputError('SYCL / CUDA / ROCm are mututally exclusive. '
+ 'At most 1 GPU platform can be configured.')
+
set_build_var(environ_cp, 'TF_NEED_MPI', 'MPI', 'with_mpi_support', False)
if environ_cp.get('TF_NEED_MPI') == '1':
set_mpi_home(environ_cp)
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index d575604a56..ca247dc56b 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -149,6 +149,7 @@ load(
"tf_cuda_tests_tags",
)
load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
+load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured")
load("@io_bazel_rules_closure//closure:defs.bzl", "closure_proto_library")
load(
"//third_party/mkl:build_defs.bzl",
@@ -3006,7 +3007,7 @@ tf_cuda_library(
"platform/device_tracer.h",
],
copts = tf_copts(),
- cuda_deps = tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps(),
+ cuda_deps = if_cuda_is_configured(tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps()),
visibility = ["//visibility:private"],
deps = [
":core_cpu_internal",
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 8f8bfadf78..cad5de1b0c 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -19,6 +19,14 @@ load(
"@local_config_cuda//cuda:build_defs.bzl",
"cuda_default_copts",
"if_cuda",
+ "if_cuda_is_configured",
+)
+load(
+ "@local_config_rocm//rocm:build_defs.bzl",
+ "if_rocm",
+ "if_rocm_is_configured",
+ "rocm_copts",
+ "rocm_default_copts",
)
load(
"//third_party/mkl:build_defs.bzl",
@@ -39,6 +47,8 @@ load(
def register_extension_info(**kwargs):
pass
+# if_cuda_is_configured def placeholder
+
# Given a source file, generate a test name.
# i.e. "common_runtime/direct_session_test.cc" becomes
# "common_runtime_direct_session_test"
@@ -863,12 +873,16 @@ def tf_cuda_only_cc_test(
srcs = srcs + tf_binary_additional_srcs(),
size = size,
args = args,
- copts = _cuda_copts() + tf_copts(),
+ copts = _cuda_copts() + rocm_copts() + tf_copts(),
data = data + tf_binary_dynamic_kernel_dsos(kernels),
- deps = deps + tf_binary_dynamic_kernel_deps(kernels) + if_cuda([
- clean_dep("//tensorflow/core:cuda"),
- clean_dep("//tensorflow/core:gpu_lib"),
- ]),
+ deps = deps + tf_binary_dynamic_kernel_deps(kernels) +
+ if_cuda_is_configured([
+ clean_dep("//tensorflow/core:cuda"),
+ clean_dep("//tensorflow/core:gpu_lib"),
+ ]) +
+ if_rocm_is_configured([
+ clean_dep("//tensorflow/core:gpu_lib"),
+ ]),
linkopts = if_not_windows(["-lpthread", "-lm"]) + linkopts + _rpath_linkopts(name),
linkstatic = linkstatic or select({
# cc_tests with ".so"s in srcs incorrectly link on Darwin
@@ -1003,7 +1017,7 @@ register_extension_info(
label_regex_for_dep = "{extension_name}",
)
-def _cuda_copts():
+def _cuda_copts(opts = []):
"""Gets the appropriate set of copts for (maybe) CUDA compilation.
If we're doing CUDA compilation, returns copts for our particular CUDA
@@ -1019,13 +1033,17 @@ def _cuda_copts():
"@local_config_cuda//cuda:using_clang": ([
"-fcuda-flush-denormals-to-zero",
]),
- })
+ }) + if_cuda_is_configured(opts)
# Build defs for TensorFlow kernels
# When this target is built using --config=cuda, a cc_library is built
# that passes -DGOOGLE_CUDA=1 and '-x cuda', linking in additional
# libraries needed by GPU kernels.
+#
+# When this target is built using --config=rocm, a cc_library is built
+# that passes -DTENSORFLOW_USE_ROCM and '-x rocm', linking in additional
+# libraries needed by GPU kernels.
def tf_gpu_kernel_library(
srcs,
copts = [],
@@ -1033,16 +1051,18 @@ def tf_gpu_kernel_library(
deps = [],
hdrs = [],
**kwargs):
- copts = copts + _cuda_copts() + if_cuda(cuda_copts) + tf_copts()
+ copts = copts + tf_copts() + _cuda_copts(opts = cuda_copts) + rocm_copts(opts = cuda_copts)
kwargs["features"] = kwargs.get("features", []) + ["-use_header_modules"]
native.cc_library(
srcs = srcs,
hdrs = hdrs,
copts = copts,
- deps = deps + if_cuda([
+ deps = deps + if_cuda_is_configured([
clean_dep("//tensorflow/core:cuda"),
clean_dep("//tensorflow/core:gpu_lib"),
+ ]) + if_rocm_is_configured([
+ clean_dep("//tensorflow/core:gpu_lib"),
]),
alwayslink = 1,
**kwargs
@@ -1081,8 +1101,10 @@ def tf_cuda_library(deps = None, cuda_deps = None, copts = tf_copts(), **kwargs)
deps = deps + if_cuda(cuda_deps + [
clean_dep("//tensorflow/core:cuda"),
"@local_config_cuda//cuda:cuda_headers",
+ ]) + if_rocm_is_configured(cuda_deps + [
+ # rocm_header placeholder
]),
- copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_mkl(["-DINTEL_MKL=1"]) +
+ copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_rocm(["-DTENSORFLOW_USE_ROCM=1"]) + if_mkl(["-DINTEL_MKL=1"]) +
if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) +
if_enable_mkl(["-DENABLE_MKL"]) +
if_tensorrt(["-DGOOGLE_TENSORRT=1"])),
@@ -1465,6 +1487,9 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
"@local_config_cuda//cuda:cuda_headers",
"@local_config_cuda//cuda:cudart_static",
]
+ rocm_deps = [
+ clean_dep("//tensorflow/core:stream_executor_headers_lib"),
+ ]
deps = deps + tf_custom_op_library_additional_deps()
if gpu_srcs:
basename = name.split(".")[0]
@@ -1473,13 +1498,14 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
srcs = gpu_srcs,
copts = _cuda_copts() + if_tensorrt(["-DGOOGLE_TENSORRT=1"]),
features = if_cuda(["-use_header_modules"]),
- deps = deps + if_cuda(cuda_deps),
+ deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps),
)
cuda_deps.extend([":" + basename + "_gpu"])
+ rocm_deps.extend([":" + basename + "_gpu"])
check_deps(
name = name + "_check_deps",
- deps = deps + if_cuda(cuda_deps),
+ deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps),
disallowed_deps = [
clean_dep("//tensorflow/core:framework"),
clean_dep("//tensorflow/core:lib"),
@@ -1488,7 +1514,7 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
tf_cc_shared_object(
name = name,
srcs = srcs,
- deps = deps + if_cuda(cuda_deps),
+ deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps),
data = if_static([name + "_check_deps"]),
copts = tf_copts(is_external = True),
features = ["windows_export_all_symbols"],
diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm
new file mode 100644
index 0000000000..aadaa8bac1
--- /dev/null
+++ b/tensorflow/tools/ci_build/Dockerfile.rocm
@@ -0,0 +1,97 @@
+# This Dockerfile provides a starting point for a ROCm installation of
+# MIOpen and tensorflow.
+FROM ubuntu:xenial
+MAINTAINER Jeff Poznanovic <jeffrey.poznanovic@amd.com>
+
+ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/debian/
+ARG ROCM_PATH=/opt/rocm
+
+ENV DEBIAN_FRONTEND noninteractive
+ENV TF_NEED_ROCM 1
+ENV HOME /root/
+RUN apt update && apt install -y wget software-properties-common
+
+# Add rocm repository
+RUN apt-get clean all
+RUN wget -qO - $DEB_ROCM_REPO/rocm.gpg.key | apt-key add -
+RUN sh -c "echo deb [arch=amd64] $DEB_ROCM_REPO xenial main > /etc/apt/sources.list.d/rocm.list"
+
+# Install misc pkgs
+RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+ build-essential \
+ clang-3.8 \
+ clang-format-3.8 \
+ clang-tidy-3.8 \
+ cmake \
+ cmake-qt-gui \
+ ssh \
+ curl \
+ apt-utils \
+ pkg-config \
+ g++-multilib \
+ git \
+ libunwind-dev \
+ libfftw3-dev \
+ libelf-dev \
+ libncurses5-dev \
+ libpthread-stubs0-dev \
+ vim \
+ gfortran \
+ libboost-program-options-dev \
+ libssl-dev \
+ libboost-dev \
+ libboost-system-dev \
+ libboost-filesystem-dev \
+ rpm \
+ libnuma-dev \
+ virtualenv \
+ python-pip \
+ python3-pip \
+ wget && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
+# Install rocm pkgs
+RUN apt-get update --allow-insecure-repositories && \
+ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
+ rocm-dev rocm-libs rocm-utils \
+ rocfft miopen-hip miopengemm rocblas hipblas rocrand \
+ rocm-profiler cxlactivitylogger && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
+RUN cd ~ && git clone https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP.git
+RUN cd ~/HIP && mkdir -p build && cd build && cmake .. && make package -j && dpkg -i *.deb
+
+ENV HCC_HOME=$ROCM_PATH/hcc
+ENV HIP_PATH=$ROCM_PATH/hip
+ENV OPENCL_ROOT=$ROCM_PATH/opencl
+ENV PATH="$HCC_HOME/bin:$HIP_PATH/bin:${PATH}"
+ENV PATH="$ROCM_PATH/bin:${PATH}"
+ENV PATH="$OPENCL_ROOT/bin:${PATH}"
+
+# Add target file to help determine which device(s) to build for
+RUN echo -e "gfx803\ngfx900" >> /opt/rocm/bin/target.lst
+
+# Setup environment variables, and add those environment variables at the end of ~/.bashrc
+ARG HCC_HOME=/opt/rocm/hcc
+ARG HIP_PATH=/opt/rocm/hip
+ARG PATH=$HCC_HOME/bin:$HIP_PATH/bin:$PATH
+
+# Copy and run the install scripts.
+COPY install/*.sh /install/
+ARG DEBIAN_FRONTEND=noninteractive
+RUN /install/install_bootstrap_deb_packages.sh
+RUN add-apt-repository -y ppa:openjdk-r/ppa && \
+ add-apt-repository -y ppa:george-edison55/cmake-3.x
+RUN /install/install_deb_packages.sh
+RUN /install/install_pip_packages.sh
+RUN /install/install_bazel.sh
+RUN /install/install_golang.sh
+
+# Set up the master bazelrc configuration file.
+COPY install/.bazelrc /etc/bazel.bazelrc
+
+# Configure the build for our CUDA configuration.
+ENV TF_NEED_ROCM 1
+
diff --git a/tensorflow/tools/ci_build/builds/docker_test.sh b/tensorflow/tools/ci_build/builds/docker_test.sh
index e337ea4b05..38891b60e5 100755
--- a/tensorflow/tools/ci_build/builds/docker_test.sh
+++ b/tensorflow/tools/ci_build/builds/docker_test.sh
@@ -19,7 +19,7 @@
#
# Usage: docker_test.sh <IMAGE_TYPE> <TAG> <WHL_PATH>
# Arguments:
-# IMAGE_TYPE : Type of the image: (CPU|GPU)
+# IMAGE_TYPE : Type of the image: (CPU|GPU|ROCM)
# TAG : Docker image tag
# WHL_PATH : Path to the whl file to be installed inside the docker image
#
@@ -60,6 +60,8 @@ if [[ "${IMAGE_TYPE}" == "cpu" ]]; then
DOCKERFILE="tensorflow/tools/docker/Dockerfile"
elif [[ "${IMAGE_TYPE}" == "gpu" ]]; then
DOCKERFILE="tensorflow/tools/docker/Dockerfile.gpu"
+elif [[ "${IMAGE_TYPE}" == "rocm" ]]; then
+ DOCKERFILE="tensorflow/tools/docker/Dockerfile.rocm"
else
die "Unrecognized image type: $1"
fi
@@ -106,13 +108,16 @@ if [ "${IMAGE_TYPE}" == "gpu" ]; then
devices=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
libs=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}')
GPU_EXTRA_PARAMS="${devices} ${libs}"
+elif [ "${IMAGE_TYPE}" == "rocm" ]; then
+ ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video"
else
GPU_EXTRA_PARAMS=""
+ ROCM_EXTRA_PARAMS=""
fi
# Run docker image with source directory mapped
docker run -v ${BASE_DIR}:/tensorflow-src -w /tensorflow-src \
-${GPU_EXTRA_PARAMS} \
+${GPU_EXTRA_PARAMS} ${ROCM_EXTRA_PARAMS} \
"${DOCKER_IMG_TAG}" \
/bin/bash -c "tensorflow/tools/ci_build/builds/run_pip_tests.sh && "\
"tensorflow/tools/ci_build/builds/test_tutorials.sh && "\
diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh
index fef121ab5a..6543779022 100755
--- a/tensorflow/tools/ci_build/builds/pip.sh
+++ b/tensorflow/tools/ci_build/builds/pip.sh
@@ -132,6 +132,7 @@ echo "Using Bazel flags: ${BAZEL_FLAGS}"
PIP_BUILD_TARGET="//tensorflow/tools/pip_package:build_pip_package"
GPU_FLAG=""
if [[ ${CONTAINER_TYPE} == "cpu" ]] || \
+ [[ ${CONTAINER_TYPE} == "rocm" ]] || \
[[ ${CONTAINER_TYPE} == "debian.jessie.cpu" ]]; then
bazel build ${BAZEL_FLAGS} ${PIP_BUILD_TARGET} || \
die "Build failed."
@@ -255,7 +256,8 @@ if [[ $(uname) == "Linux" ]]; then
die "ERROR: Cannot find repaired wheel."
fi
# Copy and rename for gpu manylinux as we do not want auditwheel to package in libcudart.so
- elif [[ ${CONTAINER_TYPE} == "gpu" ]]; then
+ elif [[ ${CONTAINER_TYPE} == "gpu" ]] || \
+ [[ ${CONTAINER_TYPE} == "rocm" ]]; then
WHL_PATH=${AUDITED_WHL_NAME}
cp ${WHL_DIR}/${WHL_BASE_NAME} ${WHL_PATH}
echo "Copied manylinx1 wheel file at ${WHL_PATH}"
diff --git a/tensorflow/tools/ci_build/builds/with_the_same_user b/tensorflow/tools/ci_build/builds/with_the_same_user
index b216e3549f..1cc5aed15d 100755
--- a/tensorflow/tools/ci_build/builds/with_the_same_user
+++ b/tensorflow/tools/ci_build/builds/with_the_same_user
@@ -48,6 +48,12 @@ getent passwd "${CI_BUILD_UID}" || adduser ${ADDUSER_OPTS} \
usermod -a -G sudo "${CI_BUILD_USER}"
echo "${CI_BUILD_USER} ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-nopasswd-sudo
+if [[ "${TF_NEED_ROCM}" -eq 1 ]]; then
+ # ROCm requires the video group in order to use the GPU for compute. If it
+ # exists on the host, add it to the container.
+ getent group video || addgroup video && adduser "${CI_BUILD_USER}" video
+fi
+
if [ -e /root/.bazelrc ]; then
cp /root/.bazelrc "${CI_BUILD_HOME}/.bazelrc"
chown "${CI_BUILD_UID}:${CI_BUILD_GID}" "${CI_BUILD_HOME}/.bazelrc"
diff --git a/tensorflow/tools/ci_build/ci_build.sh b/tensorflow/tools/ci_build/ci_build.sh
index 77265e0f50..eab0616513 100755
--- a/tensorflow/tools/ci_build/ci_build.sh
+++ b/tensorflow/tools/ci_build/ci_build.sh
@@ -18,7 +18,7 @@
# <COMMAND>
#
# CONTAINER_TYPE: Type of the docker container used the run the build:
-# e.g., (cpu | gpu | android | tensorboard)
+# e.g., (cpu | gpu | rocm | android | tensorboard)
#
# DOCKERFILE_PATH: (Optional) Path to the Dockerfile used for docker build.
# If this optional value is not supplied (via the
@@ -103,6 +103,14 @@ if [[ "${CONTAINER_TYPE}" != gpu* ]]; then
GPU_EXTRA_PARAMS=""
fi
+# Add extra params for rocm devices and libraries for ROCm container.
+if [[ "${CONTAINER_TYPE}" == "rocm" ]]; then
+ ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video"
+else
+ ROCM_EXTRA_PARAMS=""
+fi
+
+
# Determine the docker image name
DOCKER_IMG_NAME="${BUILD_TAG}.${CONTAINER_TYPE}"
@@ -159,6 +167,7 @@ ${DOCKER_BINARY} run --rm --pid=host \
-v ${WORKSPACE}:/workspace \
-w /workspace \
${GPU_EXTRA_PARAMS} \
+ ${ROCM_EXTRA_PARAMS} \
${CI_DOCKER_EXTRA_PARAMS[@]} \
"${DOCKER_IMG_NAME}" \
${CI_COMMAND_PREFIX[@]} \
diff --git a/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh b/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh
index 8eeddcdb82..3b5c92d148 100755
--- a/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh
+++ b/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh
@@ -26,6 +26,7 @@ echo ""
# Run configure.
export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
export CC_OPT_FLAGS='-mavx'
# Only running cc tests, python version does not matter.
export PYTHON_BIN_PATH=`which python`
diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh b/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh
index 8eca1987f0..52eff6330f 100755
--- a/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh
+++ b/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh
@@ -26,6 +26,7 @@ echo ""
# Run configure.
export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
export CC_OPT_FLAGS='-mavx'
export PYTHON_BIN_PATH=`which python2`
yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh b/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh
index f6fa9251d4..d12027599a 100755
--- a/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh
+++ b/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh
@@ -26,6 +26,7 @@ echo ""
# Run configure.
export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
export CC_OPT_FLAGS='-mavx'
export PYTHON_BIN_PATH=`which python3`
yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh b/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh
index 51eb2cd7e6..7c531a4d68 100755
--- a/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh
+++ b/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh
@@ -26,6 +26,7 @@ echo ""
# Run configure.
export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
export CC_OPT_FLAGS='-mavx'
export PYTHON_BIN_PATH=`which python3`
yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/ci_build/linux/libtensorflow.sh b/tensorflow/tools/ci_build/linux/libtensorflow.sh
index beef8e063b..3b6e15feb9 100755
--- a/tensorflow/tools/ci_build/linux/libtensorflow.sh
+++ b/tensorflow/tools/ci_build/linux/libtensorflow.sh
@@ -27,5 +27,8 @@ SUFFIX="-cpu-linux-"
if [ "${TF_NEED_CUDA}" == "1" ]; then
SUFFIX="-gpu-linux-"
fi
+if [ "${TF_NEED_ROCM}" == "1" ]; then
+ SUFFIX="-rocm-linux-"
+fi
build_libtensorflow_tarball "${SUFFIX}$(uname -m)"
diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh
index 4bf34dd299..b76262b6e9 100755
--- a/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh
+++ b/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh
@@ -19,4 +19,5 @@
set -ex
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
"${SCRIPT_DIR}/libtensorflow_docker.sh"
diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
index 60c974c36b..467b8dc808 100755
--- a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
+++ b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
@@ -38,6 +38,11 @@ if [ "${TF_NEED_CUDA}" == "1" ]; then
DOCKER_BINARY="nvidia-docker"
DOCKER_FILE="Dockerfile.gpu"
fi
+if [ "${TF_NEED_ROCM}" == "1" ]; then
+ DOCKER_IMAGE="tf-tensorflow-rocm"
+ DOCKER_BINARY="docker"
+ DOCKER_FILE="Dockerfile.rocm"
+fi
docker build \
-t "${DOCKER_IMAGE}" \
@@ -53,6 +58,7 @@ ${DOCKER_BINARY} run \
-e "TF_NEED_HDFS=0" \
-e "TF_NEED_CUDA=${TF_NEED_CUDA}" \
-e "TF_NEED_TENSORRT=${TF_NEED_CUDA}" \
+ -e "TF_NEED_ROCM=${TF_NEED_ROCM}" \
-e "TF_NEED_OPENCL_SYCL=0" \
"${DOCKER_IMAGE}" \
"/workspace/tensorflow/tools/ci_build/linux/libtensorflow.sh"
diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh b/tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh
new file mode 100755
index 0000000000..c1ebbe3630
--- /dev/null
+++ b/tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Script to build a binary releases of libtensorflow with GPU support.
+
+set -ex
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+export TF_NEED_ROCM=1
+"${SCRIPT_DIR}/libtensorflow_docker.sh"
diff --git a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
new file mode 100755
index 0000000000..200089f90e
--- /dev/null
+++ b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ==============================================================================
+
+set -e
+set -x
+
+N_JOBS=$(grep -c ^processor /proc/cpuinfo)
+
+echo ""
+echo "Bazel will use ${N_JOBS} concurrent job(s)."
+echo ""
+
+# Run configure.
+export PYTHON_BIN_PATH=`which python3`
+export CC_OPT_FLAGS='-mavx'
+
+export TF_NEED_ROCM=1
+
+yes "" | $PYTHON_BIN_PATH configure.py
+
+# Run bazel test command. Double test timeouts to avoid flakes.
+bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchmark-test -k \
+ --test_lang_filters=cc --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \
+ --build_tests_only --test_output=errors --local_test_jobs=1 --config=opt \
+ //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/...
diff --git a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
new file mode 100755
index 0000000000..1d0b838c1b
--- /dev/null
+++ b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ==============================================================================
+
+set -e
+set -x
+
+N_JOBS=$(grep -c ^processor /proc/cpuinfo)
+
+echo ""
+echo "Bazel will use ${N_JOBS} concurrent job(s)."
+echo ""
+
+# Run configure.
+export PYTHON_BIN_PATH=`which python3`
+export CC_OPT_FLAGS='-mavx'
+
+export TF_NEED_ROCM=1
+
+yes "" | $PYTHON_BIN_PATH configure.py
+
+# Run bazel test command. Double test timeouts to avoid flakes.
+bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchmark-test -k \
+ --test_lang_filters=py --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \
+ --build_tests_only --test_output=errors --local_test_jobs=1 --config=opt \
+ //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/...
diff --git a/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh b/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh
index c7cc16e669..adee0d3171 100755
--- a/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh
+++ b/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh
@@ -27,6 +27,7 @@ echo ""
# Run configure.
export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
export CC_OPT_FLAGS='-mavx'
export PYTHON_BIN_PATH=$(which python2)
yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
index 9ae5fc6bea..06798adc03 100755
--- a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
+++ b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
@@ -26,6 +26,7 @@ source "${SCRIPT_DIR}/../builds/libtensorflow.sh"
export PYTHON_BIN_PATH="/usr/bin/python"
export TF_NEED_HDFS=0
export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
export TF_NEED_OPENCL_SYCL=0
export TF_NEED_MKL=0
export COMPUTECPP_PATH="/usr/local"
diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
index d95fcdeb85..95f1992d7d 100755
--- a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
+++ b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
@@ -27,6 +27,7 @@ export TF_NEED_CUDA=1
export LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/extras/CUPTI/lib:${LD_LIBRARY_PATH}"
export PYTHON_BIN_PATH="/usr/bin/python"
export TF_NEED_HDFS=0
+export TF_NEED_ROCM=0
export TF_NEED_OPENCL_SYCL=0
export TF_NEED_MKL=0
export COMPUTECPP_PATH="/usr/local"
diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh b/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh
new file mode 100755
index 0000000000..aeabc0e39e
--- /dev/null
+++ b/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Script to produce binary release of libtensorflow (C API, Java jars etc.).
+
+set -ex
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# See comments at the top of this file for details.
+source "${SCRIPT_DIR}/../builds/libtensorflow.sh"
+
+# Configure script
+export TF_NEED_ROCM=1
+export PYTHON_BIN_PATH="/usr/bin/python"
+export TF_NEED_GCP=0
+export TF_NEED_HDFS=0
+export TF_NEED_CUDA=0
+export TF_NEED_OPENCL_SYCL=0
+export TF_NEED_MKL=0
+export COMPUTECPP_PATH="/usr/local"
+
+export PATH="/usr/local/cuda/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin"
+build_libtensorflow_tarball "-gpu-darwin-$(uname -m)"
diff --git a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
new file mode 100755
index 0000000000..a0de128020
--- /dev/null
+++ b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ==============================================================================
+
+set -e
+set -x
+
+N_JOBS=$(grep -c ^processor /proc/cpuinfo)
+
+echo ""
+echo "Bazel will use ${N_JOBS} concurrent job(s)."
+echo ""
+
+# Run configure.
+export PYTHON_BIN_PATH=`which python3`
+
+export TF_NEED_ROCM=1
+
+yes "" | $PYTHON_BIN_PATH configure.py
+echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc
+
+bazel clean
+# Run bazel test command. Double test timeouts to avoid flakes.
+bazel test --config=rocm --test_tag_filters=-no_gpu,-benchmark-test,-no_oss -k \
+ --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \
+ --build_tests_only --test_output=errors --local_test_jobs=1 \
+ --config=xla -- \
+ //tensorflow/compiler/...
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 9128a1aafe..4bf2ff3fb5 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -1,6 +1,7 @@
# TensorFlow external dependencies that can be loaded in WORKSPACE files.
load("//third_party/gpus:cuda_configure.bzl", "cuda_configure")
+load("//third_party/gpus:rocm_configure.bzl", "rocm_configure")
load("//third_party/tensorrt:tensorrt_configure.bzl", "tensorrt_configure")
load("//third_party:nccl/nccl_configure.bzl", "nccl_configure")
load("//third_party/mkl:build_defs.bzl", "mkl_repository")
@@ -43,6 +44,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
sycl_configure(name = "local_config_sycl")
syslibs_configure(name = "local_config_syslibs")
python_configure(name = "local_config_python")
+ rocm_configure(name = "local_config_rocm")
initialize_third_party()
diff --git a/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl b/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl
new file mode 100644
index 0000000000..0e175b3ef6
--- /dev/null
+++ b/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl
@@ -0,0 +1,158 @@
+major_version: "local"
+minor_version: ""
+default_target_cpu: "same_as_host"
+
+default_toolchain {
+ cpu: "k8"
+ toolchain_identifier: "local_linux"
+}
+default_toolchain {
+ cpu: "piii"
+ toolchain_identifier: "local_linux"
+}
+default_toolchain {
+ cpu: "arm"
+ toolchain_identifier: "local_linux"
+}
+default_toolchain {
+ cpu: "ppc"
+ toolchain_identifier: "local_linux"
+}
+
+toolchain {
+ abi_version: "local"
+ abi_libc_version: "local"
+ builtin_sysroot: ""
+ compiler: "compiler"
+ host_system_name: "local"
+ needsPic: true
+ supports_gold_linker: false
+ supports_incremental_linker: false
+ supports_fission: false
+ supports_interface_shared_objects: false
+ supports_normalizing_ar: false
+ supports_start_end_lib: false
+ supports_thin_archives: false
+ target_libc: "local"
+ target_cpu: "local"
+ target_system_name: "local"
+ toolchain_identifier: "local_linux"
+
+ tool_path { name: "ar" path: "/usr/bin/ar" }
+ tool_path { name: "compat-ld" path: "/usr/bin/ld" }
+ tool_path { name: "cpp" path: "/usr/bin/cpp" }
+ tool_path { name: "dwp" path: "/usr/bin/dwp" }
+ # As part of the TensorFlow release, we place some ROCm-related compilation
+ # files in @local_config_rocm//crosstool/clang/bin, and this relative
+ # path, combined with the rest of our Bazel configuration causes our
+ # compilation to use those files.
+ tool_path { name: "gcc" path: "clang/bin/crosstool_wrapper_driver_rocm" }
+ # Use "-std=c++11" for hipcc. For consistency, force both the host compiler
+ # and the device compiler to use "-std=c++11".
+ cxx_flag: "-std=c++11"
+ linker_flag: "-Wl,-no-as-needed"
+ linker_flag: "-lstdc++"
+ #linker_flag: "-B/usr/bin/"
+ linker_flag: "-B/opt/rocm/hcc/compiler/bin"
+
+%{host_compiler_includes}
+ tool_path { name: "gcov" path: "/usr/bin/gcov" }
+
+ # C(++) compiles invoke the compiler (as that is the one knowing where
+ # to find libraries), but we provide LD so other rules can invoke the linker.
+ tool_path { name: "ld" path: "/usr/bin/ld" }
+
+ tool_path { name: "nm" path: "/usr/bin/nm" }
+ tool_path { name: "objcopy" path: "/usr/bin/objcopy" }
+ objcopy_embed_flag: "-I"
+ objcopy_embed_flag: "binary"
+ tool_path { name: "objdump" path: "/usr/bin/objdump" }
+ tool_path { name: "strip" path: "/usr/bin/strip" }
+
+ # Anticipated future default.
+ unfiltered_cxx_flag: "-no-canonical-prefixes"
+
+ # Make C++ compilation deterministic. Use linkstamping instead of these
+ # compiler symbols.
+ unfiltered_cxx_flag: "-Wno-builtin-macro-redefined"
+ unfiltered_cxx_flag: "-D__DATE__=\"redacted\""
+ unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\""
+ unfiltered_cxx_flag: "-D__TIME__=\"redacted\""
+ unfiltered_cxx_flag: "-D__HIP_PLATFORM_HCC__"
+ # The macro EIGEN_USE_HIP is used to tell Eigen to use the HIP platform headers
+ # It needs to be always set when compiling Eigen headers
+ # (irrespective of whether the source file is being compiled via HIPCC)
+ # so adding -DEIGEN_USE_HIP as a default CXX flag here
+ unfiltered_cxx_flag: "-DEIGEN_USE_HIP"
+
+
+ # Security hardening on by default.
+ # Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases.
+ # We need to undef it before redefining it as some distributions now have
+ # it enabled by default.
+ #compiler_flag: "-U_FORTIFY_SOURCE"
+ #compiler_flag: "-D_FORTIFY_SOURCE=1"
+ #compiler_flag: "-fstack-protector"
+ #compiler_flag: "-fPIE"
+ #linker_flag: "-pie"
+ #linker_flag: "-Wl,-z,relro,-z,now"
+
+ # Enable coloring even if there's no attached terminal. Bazel removes the
+ # escape sequences if --nocolor is specified. This isn't supported by gcc
+ # on Ubuntu 14.04.
+ # compiler_flag: "-fcolor-diagnostics"
+
+ # All warnings are enabled. Maybe enable -Werror as well?
+ compiler_flag: "-Wall"
+ # Enable a few more warnings that aren't part of -Wall.
+ compiler_flag: "-Wunused-but-set-parameter"
+ # But disable some that are problematic.
+ compiler_flag: "-Wno-free-nonheap-object" # has false positives
+
+ # Keep stack frames for debugging, even in opt mode.
+ compiler_flag: "-fno-omit-frame-pointer"
+
+ # Anticipated future default.
+ linker_flag: "-no-canonical-prefixes"
+ unfiltered_cxx_flag: "-fno-canonical-system-headers"
+ # Have gcc return the exit code from ld.
+ linker_flag: "-pass-exit-codes"
+ # Stamp the binary with a unique identifier.
+ linker_flag: "-Wl,--build-id=md5"
+ linker_flag: "-Wl,--hash-style=gnu"
+ # Gold linker only? Can we enable this by default?
+ # linker_flag: "-Wl,--warn-execstack"
+ # linker_flag: "-Wl,--detect-odr-violations"
+
+ # Include directory for ROCm headers.
+%{rocm_include_path}
+
+ compilation_mode_flags {
+ mode: DBG
+ # Enable debug symbols.
+ compiler_flag: "-g"
+ }
+ compilation_mode_flags {
+ mode: OPT
+
+ # No debug symbols.
+ # Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or
+ # even generally? However, that can't happen here, as it requires special
+ # handling in Bazel.
+ compiler_flag: "-g0"
+
+ # Conservative choice for -O
+ # -O3 can increase binary size and even slow down the resulting binaries.
+ # Profile first and / or use FDO if you need better performance than this.
+ compiler_flag: "-O2"
+
+ # Disable assertions
+ compiler_flag: "-DNDEBUG"
+
+ # Removal of unused code and data at link time (can this increase binary size in some cases?).
+ compiler_flag: "-ffunction-sections"
+ compiler_flag: "-fdata-sections"
+ linker_flag: "-Wl,--gc-sections"
+ }
+ linking_mode_flags { mode: DYNAMIC }
+}
diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
new file mode 100755
index 0000000000..824238022b
--- /dev/null
+++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
@@ -0,0 +1,241 @@
+#!/usr/bin/env python
+"""Crosstool wrapper for compiling ROCm programs.
+
+SYNOPSIS:
+ crosstool_wrapper_driver_rocm [options passed in by cc_library()
+ or cc_binary() rule]
+
+DESCRIPTION:
+ This script is expected to be called by the cc_library() or cc_binary() bazel
+ rules. When the option "-x rocm" is present in the list of arguments passed
+ to this script, it invokes the hipcc compiler. Most arguments are passed
+ as is as a string to --compiler-options of hipcc. When "-x rocm" is not
+ present, this wrapper invokes gcc with the input arguments as is.
+"""
+
+from __future__ import print_function
+
+__author__ = 'whchung@gmail.com (Wen-Heng (Jack) Chung)'
+
+from argparse import ArgumentParser
+import os
+import subprocess
+import re
+import sys
+import pipes
+
+# Template values set by rocm_configure.bzl.
+CPU_COMPILER = ('%{cpu_compiler}')
+GCC_HOST_COMPILER_PATH = ('%{gcc_host_compiler_path}')
+
+HIPCC_PATH = '%{hipcc_path}'
+PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH)
+
+def Log(s):
+ print('gpus/crosstool: {0}'.format(s))
+
+
+def GetOptionValue(argv, option):
+ """Extract the list of values for option from the argv list.
+
+ Args:
+ argv: A list of strings, possibly the argv passed to main().
+ option: The option whose value to extract, without the leading '-'.
+
+ Returns:
+ A list of values, either directly following the option,
+ (eg., -opt val1 val2) or values collected from multiple occurrences of
+ the option (eg., -opt val1 -opt val2).
+ """
+
+ parser = ArgumentParser()
+ parser.add_argument('-' + option, nargs='*', action='append')
+ args, _ = parser.parse_known_args(argv)
+ if not args or not vars(args)[option]:
+ return []
+ else:
+ return sum(vars(args)[option], [])
+
+
+def GetHostCompilerOptions(argv):
+ """Collect the -isystem, -iquote, and --sysroot option values from argv.
+
+ Args:
+ argv: A list of strings, possibly the argv passed to main().
+
+ Returns:
+ The string that can be used as the --compiler-options to hipcc.
+ """
+
+ parser = ArgumentParser()
+ parser.add_argument('-isystem', nargs='*', action='append')
+ parser.add_argument('-iquote', nargs='*', action='append')
+ parser.add_argument('--sysroot', nargs=1)
+ parser.add_argument('-g', nargs='*', action='append')
+ parser.add_argument('-fno-canonical-system-headers', action='store_true')
+
+ args, _ = parser.parse_known_args(argv)
+
+ opts = ''
+
+ if args.isystem:
+ opts += ' -isystem ' + ' -isystem '.join(sum(args.isystem, []))
+ if args.iquote:
+ opts += ' -iquote ' + ' -iquote '.join(sum(args.iquote, []))
+ if args.g:
+ opts += ' -g' + ' -g'.join(sum(args.g, []))
+ #if args.fno_canonical_system_headers:
+ # opts += ' -fno-canonical-system-headers'
+ if args.sysroot:
+ opts += ' --sysroot ' + args.sysroot[0]
+
+ return opts
+
+def GetHipccOptions(argv):
+ """Collect the -hipcc_options values from argv.
+
+ Args:
+ argv: A list of strings, possibly the argv passed to main().
+
+ Returns:
+ The string that can be passed directly to hipcc.
+ """
+
+ parser = ArgumentParser()
+ parser.add_argument('-hipcc_options', nargs='*', action='append')
+
+ args, _ = parser.parse_known_args(argv)
+
+ if args.hipcc_options:
+ options = _update_options(sum(args.hipcc_options, []))
+ return ' '.join(['--'+a for a in options])
+ return ''
+
+
+def InvokeHipcc(argv, log=False):
+ """Call hipcc with arguments assembled from argv.
+
+ Args:
+ argv: A list of strings, possibly the argv passed to main().
+ log: True if logging is requested.
+
+ Returns:
+ The return value of calling os.system('hipcc ' + args)
+ """
+
+ host_compiler_options = GetHostCompilerOptions(argv)
+ hipcc_compiler_options = GetHipccOptions(argv)
+ opt_option = GetOptionValue(argv, 'O')
+ m_options = GetOptionValue(argv, 'm')
+ m_options = ''.join([' -m' + m for m in m_options if m in ['32', '64']])
+ include_options = GetOptionValue(argv, 'I')
+ out_file = GetOptionValue(argv, 'o')
+ depfiles = GetOptionValue(argv, 'MF')
+ defines = GetOptionValue(argv, 'D')
+ defines = ''.join([' -D' + define for define in defines])
+ undefines = GetOptionValue(argv, 'U')
+ undefines = ''.join([' -U' + define for define in undefines])
+ std_options = GetOptionValue(argv, 'std')
+ hipcc_allowed_std_options = ["c++11"]
+ std_options = ''.join([' -std=' + define
+ for define in std_options if define in hipcc_allowed_std_options])
+
+ # The list of source files get passed after the -c option. I don't know of
+ # any other reliable way to just get the list of source files to be compiled.
+ src_files = GetOptionValue(argv, 'c')
+
+ if len(src_files) == 0:
+ return 1
+ if len(out_file) != 1:
+ return 1
+
+ opt = (' -O2' if (len(opt_option) > 0 and int(opt_option[0]) > 0)
+ else ' -g')
+
+ includes = (' -I ' + ' -I '.join(include_options)
+ if len(include_options) > 0
+ else '')
+
+ # Unfortunately, there are other options that have -c prefix too.
+ # So allowing only those look like C/C++ files.
+ src_files = [f for f in src_files if
+ re.search('\.cpp$|\.cc$|\.c$|\.cxx$|\.C$', f)]
+ srcs = ' '.join(src_files)
+ out = ' -o ' + out_file[0]
+
+ hipccopts = ' '
+ hipccopts += ' ' + hipcc_compiler_options
+ hipccopts += undefines
+ hipccopts += defines
+ hipccopts += std_options
+ hipccopts += m_options
+
+ if depfiles:
+ # Generate the dependency file
+ depfile = depfiles[0]
+ cmd = (HIPCC_PATH + ' ' + hipccopts +
+ host_compiler_options +
+ ' ' + GCC_HOST_COMPILER_PATH +
+ ' -I .' + includes + ' ' + srcs + ' -M -o ' + depfile)
+ if log: Log(cmd)
+ exit_status = os.system(cmd)
+ if exit_status != 0:
+ return exit_status
+
+ cmd = (HIPCC_PATH + ' ' + hipccopts +
+ host_compiler_options + ' -fPIC' +
+ ' ' + GCC_HOST_COMPILER_PATH +
+ ' -I .' + opt + includes + ' -c ' + srcs + out)
+
+ # TODO(zhengxq): for some reason, 'gcc' needs this help to find 'as'.
+ # Need to investigate and fix.
+ cmd = 'PATH=' + PREFIX_DIR + ':$PATH ' + cmd
+ if log: Log(cmd)
+ return os.system(cmd)
+
+
+def main():
+ # ignore PWD env var
+ os.environ['PWD']=''
+
+ parser = ArgumentParser()
+ parser.add_argument('-x', nargs=1)
+ parser.add_argument('--rocm_log', action='store_true')
+ parser.add_argument('-pass-exit-codes', action='store_true')
+ args, leftover = parser.parse_known_args(sys.argv[1:])
+
+ if args.x and args.x[0] == 'rocm':
+ if args.rocm_log: Log('-x rocm')
+ leftover = [pipes.quote(s) for s in leftover]
+ if args.rocm_log: Log('using hipcc')
+ return InvokeHipcc(leftover, log=args.rocm_log)
+
+ # XXX use hipcc to link
+ if args.pass_exit_codes:
+ gpu_compiler_flags = [flag for flag in sys.argv[1:]
+ if not flag.startswith(('-pass-exit-codes'))]
+
+ # special handling for $ORIGIN
+ # - guard every argument with ''
+ modified_gpu_compiler_flags = []
+ for flag in gpu_compiler_flags:
+ modified_gpu_compiler_flags.append("'" + flag + "'")
+
+ if args.rocm_log: Log('Link with hipcc: %s' % (' '.join([HIPCC_PATH] + modified_gpu_compiler_flags)))
+ return subprocess.call([HIPCC_PATH] + modified_gpu_compiler_flags)
+
+ # Strip our flags before passing through to the CPU compiler for files which
+ # are not -x rocm. We can't just pass 'leftover' because it also strips -x.
+ # We not only want to pass -x to the CPU compiler, but also keep it in its
+ # relative location in the argv list (the compiler is actually sensitive to
+ # this).
+ cpu_compiler_flags = [flag for flag in sys.argv[1:]
+ if not flag.startswith(('--rocm_log'))]
+
+ # XXX: SE codes need to be built with gcc, but need this macro defined
+ cpu_compiler_flags.append("-D__HIP_PLATFORM_HCC__")
+
+ return subprocess.call([CPU_COMPILER] + cpu_compiler_flags)
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/third_party/gpus/rocm/BUILD b/third_party/gpus/rocm/BUILD
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/third_party/gpus/rocm/BUILD
diff --git a/third_party/gpus/rocm/BUILD.tpl b/third_party/gpus/rocm/BUILD.tpl
new file mode 100644
index 0000000000..8258bb3589
--- /dev/null
+++ b/third_party/gpus/rocm/BUILD.tpl
@@ -0,0 +1,99 @@
+licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like
+
+package(default_visibility = ["//visibility:public"])
+
+config_setting(
+ name = "using_hipcc",
+ values = {
+ "define": "using_rocm_hipcc=true",
+ },
+)
+
+cc_library(
+ name = "rocm_headers",
+ hdrs = [
+ "rocm/rocm_config.h",
+ %{rocm_headers}
+ ],
+ includes = [
+ ".",
+ "rocm/include",
+ ],
+ visibility = ["//visibility:public"],
+)
+
+cc_library(
+ name = "hip",
+ srcs = ["rocm/lib/%{hip_lib}"],
+ data = ["rocm/lib/%{hip_lib}"],
+ includes = [
+ ".",
+ "rocm/include",
+ ],
+ linkstatic = 1,
+ visibility = ["//visibility:public"],
+)
+
+cc_library(
+ name = "rocblas",
+ srcs = ["rocm/lib/%{rocblas_lib}"],
+ data = ["rocm/lib/%{rocblas_lib}"],
+ includes = [
+ ".",
+ "rocm/include",
+ ],
+ linkstatic = 1,
+ visibility = ["//visibility:public"],
+)
+
+cc_library(
+ name = "rocfft",
+ srcs = ["rocm/lib/%{rocfft_lib}"],
+ data = ["rocm/lib/%{rocfft_lib}"],
+ includes = [
+ ".",
+ "rocm/include",
+ ],
+ linkstatic = 1,
+ visibility = ["//visibility:public"],
+)
+
+cc_library(
+ name = "hiprand",
+ srcs = ["rocm/lib/%{hiprand_lib}"],
+ data = ["rocm/lib/%{hiprand_lib}"],
+ includes = [
+ ".",
+ "rocm/include",
+ "rocm/include/rocrand",
+ ],
+ linkstatic = 1,
+ visibility = ["//visibility:public"],
+)
+
+cc_library(
+ name = "miopen",
+ srcs = ["rocm/lib/%{miopen_lib}"],
+ data = ["rocm/lib/%{miopen_lib}"],
+ includes = [
+ ".",
+ "rocm/include",
+ ],
+ linkstatic = 1,
+ visibility = ["//visibility:public"],
+)
+
+cc_library(
+ name = "rocm",
+ visibility = ["//visibility:public"],
+ deps = [
+ ":rocm_headers",
+ ":hip",
+ ":rocblas",
+ ":rocfft",
+ ":hiprand",
+ ":miopen",
+ ],
+)
+
+%{rocm_include_genrules}
diff --git a/third_party/gpus/rocm/build_defs.bzl.tpl b/third_party/gpus/rocm/build_defs.bzl.tpl
new file mode 100644
index 0000000000..08c59f95a0
--- /dev/null
+++ b/third_party/gpus/rocm/build_defs.bzl.tpl
@@ -0,0 +1,45 @@
+# Macros for building ROCm code.
+def if_rocm(if_true, if_false = []):
+ """Shorthand for select()'ing on whether we're building with ROCm.
+
+ Returns a select statement which evaluates to if_true if we're building
+ with ROCm enabled. Otherwise, the select statement evaluates to if_false.
+
+ """
+ return select({
+ "@local_config_rocm//rocm:using_hipcc": if_true,
+ "//conditions:default": if_false
+ })
+
+
+def rocm_default_copts():
+ """Default options for all ROCm compilations."""
+ return if_rocm(["-x", "rocm"] + %{rocm_extra_copts})
+
+def rocm_copts(opts = []):
+ """Gets the appropriate set of copts for (maybe) ROCm compilation.
+
+ If we're doing ROCm compilation, returns copts for our particular ROCm
+ compiler. If we're not doing ROCm compilation, returns an empty list.
+
+ """
+ return rocm_default_copts() + select({
+ "//conditions:default": [],
+ "@local_config_rocm//rocm:using_hipcc": ([
+ "",
+ ]),
+ }) + if_rocm_is_configured(opts)
+
+def rocm_is_configured():
+ """Returns true if ROCm was enabled during the configure process."""
+ return %{rocm_is_configured}
+
+def if_rocm_is_configured(x):
+ """Tests if the ROCm was enabled during the configure process.
+
+ Unlike if_rocm(), this does not require that we are building with
+ --config=rocm. Used to allow non-ROCm code to depend on ROCm libraries.
+ """
+ if rocm_is_configured():
+ return x
+ return []
diff --git a/third_party/gpus/rocm/rocm_config.h.tpl b/third_party/gpus/rocm/rocm_config.h.tpl
new file mode 100644
index 0000000000..c5f25a845c
--- /dev/null
+++ b/third_party/gpus/rocm/rocm_config.h.tpl
@@ -0,0 +1,21 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef ROCM_ROCM_CONFIG_H_
+#define ROCM_ROCM_CONFIG_H_
+
+#define TF_ROCM_TOOLKIT_PATH "/opt/rocm"
+
+#endif // ROCM_ROCM_CONFIG_H_
diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl
new file mode 100644
index 0000000000..9108639b0b
--- /dev/null
+++ b/third_party/gpus/rocm_configure.bzl
@@ -0,0 +1,784 @@
+# -*- Python -*-
+"""Repository rule for ROCm autoconfiguration.
+
+`rocm_configure` depends on the following environment variables:
+
+ * `TF_NEED_ROCM`: Whether to enable building with ROCm.
+ * `GCC_HOST_COMPILER_PATH`: The GCC host compiler path
+ * `ROCM_TOOLKIT_PATH`: The path to the ROCm toolkit. Default is
+ `/opt/rocm`.
+ * `TF_ROCM_VERSION`: The version of the ROCm toolkit. If this is blank, then
+ use the system default.
+ * `TF_MIOPEN_VERSION`: The version of the MIOpen library.
+ * `TF_ROCM_AMDGPU_TARGETS`: The AMDGPU targets. Default is
+ `gfx803,gfx900`.
+"""
+
+_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH"
+_ROCM_TOOLKIT_PATH = "ROCM_TOOLKIT_PATH"
+_TF_ROCM_VERSION = "TF_ROCM_VERSION"
+_TF_MIOPEN_VERSION = "TF_MIOPEN_VERSION"
+_TF_ROCM_AMDGPU_TARGETS = "TF_ROCM_AMDGPU_TARGETS"
+_TF_ROCM_CONFIG_REPO = "TF_ROCM_CONFIG_REPO"
+
+_DEFAULT_ROCM_VERSION = ""
+_DEFAULT_MIOPEN_VERSION = ""
+_DEFAULT_ROCM_TOOLKIT_PATH = "/opt/rocm"
+_DEFAULT_ROCM_AMDGPU_TARGETS = ["gfx803", "gfx900"]
+
+def find_cc(repository_ctx):
+ """Find the C++ compiler."""
+
+ # Return a dummy value for GCC detection here to avoid error
+ target_cc_name = "gcc"
+ cc_path_envvar = _GCC_HOST_COMPILER_PATH
+ cc_name = target_cc_name
+
+ if cc_path_envvar in repository_ctx.os.environ:
+ cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip()
+ if cc_name_from_env:
+ cc_name = cc_name_from_env
+ if cc_name.startswith("/"):
+ # Absolute path, maybe we should make this supported by our which function.
+ return cc_name
+ cc = repository_ctx.which(cc_name)
+ if cc == None:
+ fail(("Cannot find {}, either correct your path or set the {}" +
+ " environment variable").format(target_cc_name, cc_path_envvar))
+ return cc
+
+_INC_DIR_MARKER_BEGIN = "#include <...>"
+
+def _cxx_inc_convert(path):
+ """Convert path returned by cc -E xc++ in a complete path."""
+ path = path.strip()
+ return path
+
+def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp):
+ """Compute the list of default C or C++ include directories."""
+ if lang_is_cpp:
+ lang = "c++"
+ else:
+ lang = "c"
+
+ # TODO: We pass -no-canonical-prefixes here to match the compiler flags,
+ # but in rocm_clang CROSSTOOL file that is a `feature` and we should
+ # handle the case when it's disabled and no flag is passed
+ result = repository_ctx.execute([
+ cc,
+ "-no-canonical-prefixes",
+ "-E",
+ "-x" + lang,
+ "-",
+ "-v",
+ ])
+ index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
+ if index1 == -1:
+ return []
+ index1 = result.stderr.find("\n", index1)
+ if index1 == -1:
+ return []
+ index2 = result.stderr.rfind("\n ")
+ if index2 == -1 or index2 < index1:
+ return []
+ index2 = result.stderr.find("\n", index2 + 1)
+ if index2 == -1:
+ inc_dirs = result.stderr[index1 + 1:]
+ else:
+ inc_dirs = result.stderr[index1 + 1:index2].strip()
+
+ return [
+ str(repository_ctx.path(_cxx_inc_convert(p)))
+ for p in inc_dirs.split("\n")
+ ]
+
+def get_cxx_inc_directories(repository_ctx, cc):
+ """Compute the list of default C and C++ include directories."""
+
+ # For some reason `clang -xc` sometimes returns include paths that are
+ # different from the ones from `clang -xc++`. (Symlink and a dir)
+ # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
+ includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True)
+ includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False)
+
+ includes_cpp_set = depset(includes_cpp)
+ return includes_cpp + [
+ inc
+ for inc in includes_c
+ if inc not in includes_cpp_set
+ ]
+
+def auto_configure_fail(msg):
+ """Output failure message when rocm configuration fails."""
+ red = "\033[0;31m"
+ no_color = "\033[0m"
+ fail("\n%sROCm Configuration Error:%s %s\n" % (red, no_color, msg))
+
+# END cc_configure common functions (see TODO above).
+
+def _host_compiler_includes(repository_ctx, cc):
+ """Generates the cxx_builtin_include_directory entries for gcc inc dirs.
+
+ Args:
+ repository_ctx: The repository context.
+ cc: The path to the gcc host compiler.
+
+ Returns:
+ A string containing the cxx_builtin_include_directory for each of the gcc
+ host compiler include directories, which can be added to the CROSSTOOL
+ file.
+ """
+ inc_dirs = get_cxx_inc_directories(repository_ctx, cc)
+
+ # Add numpy headers
+ inc_dirs.append("/usr/lib/python2.7/dist-packages/numpy/core/include")
+
+ entries = []
+ for inc_dir in inc_dirs:
+ entries.append(" cxx_builtin_include_directory: \"%s\"" % inc_dir)
+
+ # define TENSORFLOW_USE_ROCM
+ entries.append(" unfiltered_cxx_flag: \"-DTENSORFLOW_USE_ROCM\"")
+
+ return "\n".join(entries)
+
+def _rocm_include_path(repository_ctx, rocm_config):
+ """Generates the cxx_builtin_include_directory entries for rocm inc dirs.
+
+ Args:
+ repository_ctx: The repository context.
+ cc: The path to the gcc host compiler.
+
+ Returns:
+ A string containing the cxx_builtin_include_directory for each of the gcc
+ host compiler include directories, which can be added to the CROSSTOOL
+ file.
+ """
+ inc_dirs = []
+
+ # general ROCm include path
+ inc_dirs.append(rocm_config.rocm_toolkit_path + "/include")
+
+ # Add HSA headers
+ inc_dirs.append("/opt/rocm/hsa/include")
+
+ # Add HIP headers
+ inc_dirs.append("/opt/rocm/include/hip")
+ inc_dirs.append("/opt/rocm/include/hip/hcc_detail")
+
+ # Add rocrand and hiprand headers
+ inc_dirs.append("/opt/rocm/rocrand/include")
+ inc_dirs.append("/opt/rocm/hiprand/include")
+
+ # Add rocfft headers
+ inc_dirs.append("/opt/rocm/rocfft/include")
+
+ # Add rocBLAS headers
+ inc_dirs.append("/opt/rocm/rocblas/include")
+
+ # Add MIOpen headers
+ inc_dirs.append("/opt/rocm/miopen/include")
+
+ # Add hcc headers
+ inc_dirs.append("/opt/rocm/hcc/include")
+ inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/7.0.0/include/")
+ inc_dirs.append("/opt/rocm/hcc/lib/clang/7.0.0/include")
+
+ # Newer hcc builds use/are based off of clang 8.0.0.
+ inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/8.0.0/include/")
+ inc_dirs.append("/opt/rocm/hcc/lib/clang/8.0.0/include")
+
+ inc_entries = []
+ for inc_dir in inc_dirs:
+ inc_entries.append(" cxx_builtin_include_directory: \"%s\"" % inc_dir)
+ return "\n".join(inc_entries)
+
+def _enable_rocm(repository_ctx):
+ if "TF_NEED_ROCM" in repository_ctx.os.environ:
+ enable_rocm = repository_ctx.os.environ["TF_NEED_ROCM"].strip()
+ return enable_rocm == "1"
+ return False
+
+def _rocm_toolkit_path(repository_ctx):
+ """Finds the rocm toolkit directory.
+
+ Args:
+ repository_ctx: The repository context.
+
+ Returns:
+ A speculative real path of the rocm toolkit install directory.
+ """
+ rocm_toolkit_path = _DEFAULT_ROCM_TOOLKIT_PATH
+ if _ROCM_TOOLKIT_PATH in repository_ctx.os.environ:
+ rocm_toolkit_path = repository_ctx.os.environ[_ROCM_TOOLKIT_PATH].strip()
+ if not repository_ctx.path(rocm_toolkit_path).exists:
+ auto_configure_fail("Cannot find rocm toolkit path.")
+ return str(repository_ctx.path(rocm_toolkit_path).realpath)
+
+def _amdgpu_targets(repository_ctx):
+ """Returns a list of strings representing AMDGPU targets."""
+ if _TF_ROCM_AMDGPU_TARGETS not in repository_ctx.os.environ:
+ return _DEFAULT_ROCM_AMDGPU_TARGETS
+ amdgpu_targets_str = repository_ctx.os.environ[_TF_ROCM_AMDGPU_TARGETS]
+ amdgpu_targets = amdgpu_targets_str.split(",")
+ for amdgpu_target in amdgpu_targets:
+ if amdgpu_target[:3] != "gfx" or not amdgpu_target[3:].isdigit():
+ auto_configure_fail("Invalid AMDGPU target: %s" % amdgpu_target)
+ return amdgpu_targets
+
+def _cpu_value(repository_ctx):
+ """Returns the name of the host operating system.
+
+ Args:
+ repository_ctx: The repository context.
+
+ Returns:
+ A string containing the name of the host operating system.
+ """
+ os_name = repository_ctx.os.name.lower()
+ if os_name.startswith("mac os"):
+ return "Darwin"
+ if os_name.find("windows") != -1:
+ return "Windows"
+ result = repository_ctx.execute(["uname", "-s"])
+ return result.stdout.strip()
+
+def _lib_name(lib, cpu_value, version = "", static = False):
+ """Constructs the platform-specific name of a library.
+
+ Args:
+ lib: The name of the library, such as "hip"
+ cpu_value: The name of the host operating system.
+ version: The version of the library.
+ static: True the library is static or False if it is a shared object.
+
+ Returns:
+ The platform-specific name of the library.
+ """
+ if cpu_value in ("Linux"):
+ if static:
+ return "lib%s.a" % lib
+ else:
+ if version:
+ version = ".%s" % version
+ return "lib%s.so%s" % (lib, version)
+ elif cpu_value == "Windows":
+ return "%s.lib" % lib
+ elif cpu_value == "Darwin":
+ if static:
+ return "lib%s.a" % lib
+ elif version:
+ version = ".%s" % version
+ return "lib%s%s.dylib" % (lib, version)
+ else:
+ auto_configure_fail("Invalid cpu_value: %s" % cpu_value)
+
+def _find_rocm_lib(
+ lib,
+ repository_ctx,
+ cpu_value,
+ basedir,
+ version = "",
+ static = False):
+ """Finds the given ROCm libraries on the system.
+
+ Args:
+ lib: The name of the library, such as "hip"
+ repository_ctx: The repository context.
+ cpu_value: The name of the host operating system.
+ basedir: The install directory of ROCm.
+ version: The version of the library.
+ static: True if static library, False if shared object.
+
+ Returns:
+ Returns a struct with the following fields:
+ file_name: The basename of the library found on the system.
+ path: The full path to the library.
+ """
+ file_name = _lib_name(lib, cpu_value, version, static)
+ if cpu_value == "Linux":
+ path = repository_ctx.path("%s/lib64/%s" % (basedir, file_name))
+ if path.exists:
+ return struct(file_name = file_name, path = str(path.realpath))
+ path = repository_ctx.path("%s/lib64/stubs/%s" % (basedir, file_name))
+ if path.exists:
+ return struct(file_name = file_name, path = str(path.realpath))
+ path = repository_ctx.path(
+ "%s/lib/x86_64-linux-gnu/%s" % (basedir, file_name),
+ )
+ if path.exists:
+ return struct(file_name = file_name, path = str(path.realpath))
+
+ path = repository_ctx.path("%s/lib/%s" % (basedir, file_name))
+ if path.exists:
+ return struct(file_name = file_name, path = str(path.realpath))
+ path = repository_ctx.path("%s/%s" % (basedir, file_name))
+ if path.exists:
+ return struct(file_name = file_name, path = str(path.realpath))
+
+ auto_configure_fail("Cannot find rocm library %s" % file_name)
+
+def _find_libs(repository_ctx, rocm_config):
+ """Returns the ROCm libraries on the system.
+
+ Args:
+ repository_ctx: The repository context.
+ rocm_config: The ROCm config as returned by _get_rocm_config
+
+ Returns:
+ Map of library names to structs of filename and path as returned by
+ _find_rocm_lib.
+ """
+ cpu_value = rocm_config.cpu_value
+ return {
+ "hip": _find_rocm_lib(
+ "hip_hcc",
+ repository_ctx,
+ cpu_value,
+ rocm_config.rocm_toolkit_path,
+ ),
+ "rocblas": _find_rocm_lib(
+ "rocblas",
+ repository_ctx,
+ cpu_value,
+ rocm_config.rocm_toolkit_path + "/rocblas",
+ ),
+ "rocfft": _find_rocm_lib(
+ "rocfft",
+ repository_ctx,
+ cpu_value,
+ rocm_config.rocm_toolkit_path + "/rocfft",
+ ),
+ "hiprand": _find_rocm_lib(
+ "hiprand",
+ repository_ctx,
+ cpu_value,
+ rocm_config.rocm_toolkit_path + "/hiprand",
+ ),
+ "miopen": _find_rocm_lib(
+ "MIOpen",
+ repository_ctx,
+ cpu_value,
+ rocm_config.rocm_toolkit_path + "/miopen",
+ ),
+ }
+
+def _get_rocm_config(repository_ctx):
+ """Detects and returns information about the ROCm installation on the system.
+
+ Args:
+ repository_ctx: The repository context.
+
+ Returns:
+ A struct containing the following fields:
+ rocm_toolkit_path: The ROCm toolkit installation directory.
+ amdgpu_targets: A list of the system's AMDGPU targets.
+ cpu_value: The name of the host operating system.
+ """
+ cpu_value = _cpu_value(repository_ctx)
+ rocm_toolkit_path = _rocm_toolkit_path(repository_ctx)
+ return struct(
+ rocm_toolkit_path = rocm_toolkit_path,
+ amdgpu_targets = _amdgpu_targets(repository_ctx),
+ cpu_value = cpu_value,
+ )
+
+def _tpl(repository_ctx, tpl, substitutions = {}, out = None):
+ if not out:
+ out = tpl.replace(":", "/")
+ repository_ctx.template(
+ out,
+ Label("//third_party/gpus/%s.tpl" % tpl),
+ substitutions,
+ )
+
+def _file(repository_ctx, label):
+ repository_ctx.template(
+ label.replace(":", "/"),
+ Label("//third_party/gpus/%s.tpl" % label),
+ {},
+ )
+
+_DUMMY_CROSSTOOL_BZL_FILE = """
+def error_gpu_disabled():
+ fail("ERROR: Building with --config=rocm but TensorFlow is not configured " +
+ "to build with GPU support. Please re-run ./configure and enter 'Y' " +
+ "at the prompt to build with GPU support.")
+
+ native.genrule(
+ name = "error_gen_crosstool",
+ outs = ["CROSSTOOL"],
+ cmd = "echo 'Should not be run.' && exit 1",
+ )
+
+ native.filegroup(
+ name = "crosstool",
+ srcs = [":CROSSTOOL"],
+ output_licenses = ["unencumbered"],
+ )
+"""
+
+_DUMMY_CROSSTOOL_BUILD_FILE = """
+load("//crosstool:error_gpu_disabled.bzl", "error_gpu_disabled")
+
+error_gpu_disabled()
+"""
+
+def _create_dummy_repository(repository_ctx):
+ cpu_value = _cpu_value(repository_ctx)
+
+ # Set up BUILD file for rocm/.
+ _tpl(
+ repository_ctx,
+ "rocm:build_defs.bzl",
+ {
+ "%{rocm_is_configured}": "False",
+ "%{rocm_extra_copts}": "[]",
+ },
+ )
+ _tpl(
+ repository_ctx,
+ "rocm:BUILD",
+ {
+ "%{hip_lib}": _lib_name("hip", cpu_value),
+ "%{rocblas_lib}": _lib_name("rocblas", cpu_value),
+ "%{miopen_lib}": _lib_name("miopen", cpu_value),
+ "%{rocfft_lib}": _lib_name("rocfft", cpu_value),
+ "%{hiprand_lib}": _lib_name("hiprand", cpu_value),
+ "%{rocm_include_genrules}": "",
+ "%{rocm_headers}": "",
+ },
+ )
+
+ # Create dummy files for the ROCm toolkit since they are still required by
+ # tensorflow/core/platform/default/build_config:rocm.
+ repository_ctx.file("rocm/hip/include/hip/hip_runtime.h", "")
+
+ # Set up rocm_config.h, which is used by
+ # tensorflow/stream_executor/dso_loader.cc.
+ _tpl(
+ repository_ctx,
+ "rocm:rocm_config.h",
+ {
+ "%{rocm_toolkit_path}": _DEFAULT_ROCM_TOOLKIT_PATH,
+ },
+ "rocm/rocm/rocm_config.h",
+ )
+
+ # If rocm_configure is not configured to build with GPU support, and the user
+ # attempts to build with --config=rocm, add a dummy build rule to intercept
+ # this and fail with an actionable error message.
+ repository_ctx.file(
+ "crosstool/error_gpu_disabled.bzl",
+ _DUMMY_CROSSTOOL_BZL_FILE,
+ )
+ repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
+
+def _execute(
+ repository_ctx,
+ cmdline,
+ error_msg = None,
+ error_details = None,
+ empty_stdout_fine = False):
+ """Executes an arbitrary shell command.
+
+ Args:
+ repository_ctx: the repository_ctx object
+ cmdline: list of strings, the command to execute
+ error_msg: string, a summary of the error if the command fails
+ error_details: string, details about the error or steps to fix it
+ empty_stdout_fine: bool, if True, an empty stdout result is fine, otherwise
+ it's an error
+ Return:
+ the result of repository_ctx.execute(cmdline)
+ """
+ result = repository_ctx.execute(cmdline)
+ if result.stderr or not (empty_stdout_fine or result.stdout):
+ auto_configure_fail(
+ "\n".join([
+ error_msg.strip() if error_msg else "Repository command failed",
+ result.stderr.strip(),
+ error_details if error_details else "",
+ ]),
+ )
+ return result
+
+def _norm_path(path):
+ """Returns a path with '/' and remove the trailing slash."""
+ path = path.replace("\\", "/")
+ if path[-1] == "/":
+ path = path[:-1]
+ return path
+
+def _symlink_genrule_for_dir(
+ repository_ctx,
+ src_dir,
+ dest_dir,
+ genrule_name,
+ src_files = [],
+ dest_files = []):
+ """Returns a genrule to symlink(or copy if on Windows) a set of files.
+
+ If src_dir is passed, files will be read from the given directory; otherwise
+ we assume files are in src_files and dest_files
+ """
+ if src_dir != None:
+ src_dir = _norm_path(src_dir)
+ dest_dir = _norm_path(dest_dir)
+ files = _read_dir(repository_ctx, src_dir)
+
+ # Create a list with the src_dir stripped to use for outputs.
+ dest_files = files.replace(src_dir, "").splitlines()
+ src_files = files.splitlines()
+ command = []
+
+ # We clear folders that might have been generated previously to avoid
+ # undesired inclusions
+ command.append('if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi')
+ command.append('if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi')
+ outs = []
+ for i in range(len(dest_files)):
+ if dest_files[i] != "":
+ # If we have only one file to link we do not want to use the dest_dir, as
+ # $(@D) will include the full path to the file.
+ dest = "$(@D)/" + dest_dir + dest_files[i] if len(dest_files) != 1 else "$(@D)/" + dest_files[i]
+
+ # On Windows, symlink is not supported, so we just copy all the files.
+ cmd = "ln -s"
+ command.append(cmd + ' "%s" "%s"' % (src_files[i], dest))
+ outs.append(' "' + dest_dir + dest_files[i] + '",')
+ genrule = _genrule(
+ src_dir,
+ genrule_name,
+ " && ".join(command),
+ "\n".join(outs),
+ )
+ return genrule
+
+def _genrule(src_dir, genrule_name, command, outs):
+ """Returns a string with a genrule.
+
+ Genrule executes the given command and produces the given outputs.
+ """
+ return (
+ "genrule(\n" +
+ ' name = "' +
+ genrule_name + '",\n' +
+ " outs = [\n" +
+ outs +
+ "\n ],\n" +
+ ' cmd = """\n' +
+ command +
+ '\n """,\n' +
+ ")\n"
+ )
+
+def _read_dir(repository_ctx, src_dir):
+ """Returns a string with all files in a directory.
+
+ Finds all files inside a directory, traversing subfolders and following
+ symlinks. The returned string contains the full path of all files
+ separated by line breaks.
+ """
+ find_result = _execute(
+ repository_ctx,
+ ["find", src_dir, "-follow", "-type", "f"],
+ empty_stdout_fine = True,
+ )
+ result = find_result.stdout
+ return result
+
+def _compute_rocm_extra_copts(repository_ctx, amdgpu_targets):
+ if False:
+ amdgpu_target_flags = ["--amdgpu-target=" +
+ amdgpu_target for amdgpu_target in amdgpu_targets]
+ else:
+ # AMDGPU targets are handled in the "crosstool_wrapper_driver_is_not_gcc"
+ amdgpu_target_flags = []
+ return str(amdgpu_target_flags)
+
+def _create_local_rocm_repository(repository_ctx):
+ """Creates the repository containing files set up to build with ROCm."""
+ rocm_config = _get_rocm_config(repository_ctx)
+
+ # Set up symbolic links for the rocm toolkit by creating genrules to do
+ # symlinking. We create one genrule for each directory we want to track under
+ # rocm_toolkit_path
+ rocm_toolkit_path = rocm_config.rocm_toolkit_path
+ rocm_include_path = rocm_toolkit_path + "/include"
+ genrules = [_symlink_genrule_for_dir(
+ repository_ctx,
+ rocm_include_path,
+ "rocm/include",
+ "rocm-include",
+ )]
+ genrules.append(_symlink_genrule_for_dir(
+ repository_ctx,
+ rocm_toolkit_path + "/rocfft/include",
+ "rocm/include/rocfft",
+ "rocfft-include",
+ ))
+ genrules.append(_symlink_genrule_for_dir(
+ repository_ctx,
+ rocm_toolkit_path + "/rocblas/include",
+ "rocm/include/rocblas",
+ "rocblas-include",
+ ))
+ genrules.append(_symlink_genrule_for_dir(
+ repository_ctx,
+ rocm_toolkit_path + "/miopen/include",
+ "rocm/include/miopen",
+ "miopen-include",
+ ))
+
+ rocm_libs = _find_libs(repository_ctx, rocm_config)
+ rocm_lib_src = []
+ rocm_lib_dest = []
+ for lib in rocm_libs.values():
+ rocm_lib_src.append(lib.path)
+ rocm_lib_dest.append("rocm/lib/" + lib.file_name)
+ genrules.append(_symlink_genrule_for_dir(
+ repository_ctx,
+ None,
+ "",
+ "rocm-lib",
+ rocm_lib_src,
+ rocm_lib_dest,
+ ))
+
+ included_files = _read_dir(repository_ctx, rocm_include_path).replace(
+ rocm_include_path,
+ "",
+ ).splitlines()
+
+ # Set up BUILD file for rocm/
+ _tpl(
+ repository_ctx,
+ "rocm:build_defs.bzl",
+ {
+ "%{rocm_is_configured}": "True",
+ "%{rocm_extra_copts}": _compute_rocm_extra_copts(
+ repository_ctx,
+ rocm_config.amdgpu_targets,
+ ),
+ },
+ )
+ _tpl(
+ repository_ctx,
+ "rocm:BUILD",
+ {
+ "%{hip_lib}": rocm_libs["hip"].file_name,
+ "%{rocblas_lib}": rocm_libs["rocblas"].file_name,
+ "%{rocfft_lib}": rocm_libs["rocfft"].file_name,
+ "%{hiprand_lib}": rocm_libs["hiprand"].file_name,
+ "%{miopen_lib}": rocm_libs["miopen"].file_name,
+ "%{rocm_include_genrules}": "\n".join(genrules),
+ "%{rocm_headers}": ('":rocm-include",\n' +
+ '":rocfft-include",\n' +
+ '":rocblas-include",\n' +
+ '":miopen-include",'),
+ },
+ )
+
+ # Set up crosstool/
+ _tpl(repository_ctx, "crosstool:BUILD", {"%{linker_files}": ":empty", "%{win_linker_files}": ":empty"})
+ cc = find_cc(repository_ctx)
+ host_compiler_includes = _host_compiler_includes(repository_ctx, cc)
+ rocm_defines = {
+ "%{rocm_include_path}": _rocm_include_path(
+ repository_ctx,
+ rocm_config,
+ ),
+ "%{host_compiler_includes}": host_compiler_includes,
+ "%{clang_path}": str(cc),
+ }
+
+ _tpl(repository_ctx, "crosstool:CROSSTOOL_hipcc", rocm_defines, out = "crosstool/CROSSTOOL")
+
+ _tpl(
+ repository_ctx,
+ "crosstool:clang/bin/crosstool_wrapper_driver_rocm",
+ {
+ "%{cpu_compiler}": str(cc),
+ "%{hipcc_path}": "/opt/rocm/bin/hipcc",
+ "%{gcc_host_compiler_path}": str(cc),
+ "%{rocm_amdgpu_targets}": ",".join(
+ ["\"%s\"" % c for c in rocm_config.amdgpu_targets],
+ ),
+ },
+ )
+
+ # Set up rocm_config.h, which is used by
+ # tensorflow/stream_executor/dso_loader.cc.
+ _tpl(
+ repository_ctx,
+ "rocm:rocm_config.h",
+ {
+ "%{rocm_amdgpu_targets}": ",".join(
+ ["\"%s\"" % c for c in rocm_config.amdgpu_targets],
+ ),
+ "%{rocm_toolkit_path}": rocm_config.rocm_toolkit_path,
+ },
+ "rocm/rocm/rocm_config.h",
+ )
+
+def _create_remote_rocm_repository(repository_ctx, remote_config_repo):
+ """Creates pointers to a remotely configured repo set up to build with ROCm."""
+ _tpl(
+ repository_ctx,
+ "rocm:build_defs.bzl",
+ {
+ "%{rocm_is_configured}": "True",
+ "%{rocm_extra_copts}": _compute_rocm_extra_copts(
+ repository_ctx, #_compute_capabilities(repository_ctx)
+ ),
+ },
+ )
+ _tpl(
+ repository_ctx,
+ "rocm:remote.BUILD",
+ {
+ "%{remote_rocm_repo}": remote_config_repo,
+ },
+ "rocm/BUILD",
+ )
+ _tpl(repository_ctx, "crosstool:remote.BUILD", {
+ "%{remote_rocm_repo}": remote_config_repo,
+ }, "crosstool/BUILD")
+
+def _rocm_autoconf_impl(repository_ctx):
+ """Implementation of the rocm_autoconf repository rule."""
+ if not _enable_rocm(repository_ctx):
+ _create_dummy_repository(repository_ctx)
+ elif _TF_ROCM_CONFIG_REPO in repository_ctx.os.environ:
+ _create_remote_rocm_repository(
+ repository_ctx,
+ repository_ctx.os.environ[_TF_ROCM_CONFIG_REPO],
+ )
+ else:
+ _create_local_rocm_repository(repository_ctx)
+
+rocm_configure = repository_rule(
+ implementation = _rocm_autoconf_impl,
+ environ = [
+ _GCC_HOST_COMPILER_PATH,
+ "TF_NEED_ROCM",
+ _ROCM_TOOLKIT_PATH,
+ _TF_ROCM_VERSION,
+ _TF_MIOPEN_VERSION,
+ _TF_ROCM_AMDGPU_TARGETS,
+ _TF_ROCM_CONFIG_REPO,
+ ],
+)
+
+"""Detects and configures the local ROCm toolchain.
+
+Add the following to your WORKSPACE FILE:
+
+```python
+rocm_configure(name = "local_config_rocm")
+```
+
+Args:
+ name: A unique name for this workspace rule.
+"""
diff --git a/tools/bazel.rc b/tools/bazel.rc
index 02e5b84306..3734fab715 100644
--- a/tools/bazel.rc
+++ b/tools/bazel.rc
@@ -43,6 +43,9 @@ build:download_clang_use_lld --linkopt='-fuse-ld=lld'
build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true
+build:rocm --crosstool_top=@local_config_rocm//crosstool:toolchain
+build:rocm --define=using_rocm=true --define=using_rocm_hipcc=true
+
build:cuda_clang --crosstool_top=@local_config_cuda//crosstool:toolchain
build:cuda_clang --define=using_cuda=true --define=using_cuda_clang=true --define=using_clang=true