From 69d3b8faf41791834301a74a05e288964940427d Mon Sep 17 00:00:00 2001
From: "Wen-Heng (Jack) Chung" <whchung@gmail.com>
Date: Fri, 22 Jun 2018 23:09:43 -0500
Subject: [ROCm] bazel build system and continuous integration logic

The commit contains following components to support TensorFlow on ROCm platform

- bazel build system
- continuous integration logic

Authors:

- Jack Chung: jack.chung@amd.com
- Jeffrey Poznanovic: Jeffrey.Poznanovic@amd.com
- Peng Sun: Peng.Sun@amd.com
---
 configure.py                                       |  20 +
 tensorflow/core/BUILD                              |   4 +-
 tensorflow/core/kernels/BUILD                      |   3 +-
 tensorflow/tensorflow.bzl                          |  67 ++-
 tensorflow/tools/ci_build/Dockerfile.rocm          |  97 +++
 tensorflow/tools/ci_build/builds/docker_test.sh    |   9 +-
 tensorflow/tools/ci_build/builds/pip.sh            |   4 +-
 .../tools/ci_build/builds/with_the_same_user       |   6 +
 tensorflow/tools/ci_build/ci_build.sh              |  11 +-
 tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh |   1 +
 .../tools/ci_build/linux/cpu/run_py2_core.sh       |   1 +
 .../tools/ci_build/linux/cpu/run_py3_contrib.sh    |   1 +
 .../tools/ci_build/linux/cpu/run_py3_core.sh       |   1 +
 tensorflow/tools/ci_build/linux/libtensorflow.sh   |   3 +
 .../tools/ci_build/linux/libtensorflow_cpu.sh      |   1 +
 .../tools/ci_build/linux/libtensorflow_docker.sh   |   6 +
 .../tools/ci_build/linux/libtensorflow_rocm.sh     |  22 +
 .../tools/ci_build/linux/rocm/run_cc_core.sh       |  39 ++
 .../tools/ci_build/linux/rocm/run_py3_core.sh      |  39 ++
 .../tools/ci_build/osx/cpu/run_py2_cc_core.sh      |   1 +
 tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh |   1 +
 tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh |   1 +
 .../tools/ci_build/osx/libtensorflow_rocm.sh       |  36 ++
 .../tools/ci_build/xla/linux/rocm/run_py3.sh       |  41 ++
 tensorflow/workspace.bzl                           |   2 +
 third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl     | 158 +++++
 .../clang/bin/crosstool_wrapper_driver_rocm.tpl    | 241 ++++++++
 third_party/gpus/rocm/BUILD                        |   0
 third_party/gpus/rocm/BUILD.tpl                    |  99 +++
 third_party/gpus/rocm/build_defs.bzl.tpl           |  32 +
 third_party/gpus/rocm/rocm_config.h.tpl            |  21 +
 third_party/gpus/rocm_configure.bzl                | 663 +++++++++++++++++++++
 tools/bazel.rc                                     |   3 +
 33 files changed, 1611 insertions(+), 23 deletions(-)
 create mode 100644 tensorflow/tools/ci_build/Dockerfile.rocm
 create mode 100755 tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh
 create mode 100755 tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
 create mode 100755 tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
 create mode 100755 tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh
 create mode 100755 tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
 create mode 100644 third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl
 create mode 100755 third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
 create mode 100644 third_party/gpus/rocm/BUILD
 create mode 100644 third_party/gpus/rocm/BUILD.tpl
 create mode 100644 third_party/gpus/rocm/build_defs.bzl.tpl
 create mode 100644 third_party/gpus/rocm/rocm_config.h.tpl
 create mode 100644 third_party/gpus/rocm_configure.bzl

diff --git a/configure.py b/configure.py
index 361bd4764d..4f998511aa 100644
--- a/configure.py
+++ b/configure.py
@@ -1521,6 +1521,13 @@ def main():
     else:
       set_trisycl_include_dir(environ_cp)
 
+  set_action_env_var(environ_cp, 'TF_NEED_ROCM', 'ROCm', False)
+  if (environ_cp.get('TF_NEED_ROCM') == '1' and
+      'LD_LIBRARY_PATH' in environ_cp and environ_cp.get(
+      'LD_LIBRARY_PATH') != '1'):
+      write_action_env_to_bazelrc('LD_LIBRARY_PATH',
+                                  environ_cp.get('LD_LIBRARY_PATH'))
+
   set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False)
   if (environ_cp.get('TF_NEED_CUDA') == '1' and
       'TF_CUDA_CONFIG_REPO' not in environ_cp):
@@ -1561,6 +1568,19 @@ def main():
       write_to_bazelrc('build --config=download_clang')
       write_to_bazelrc('test --config=download_clang')
 
+  # SYCL / ROCm / CUDA are mutually exclusive.
+  # At most 1 GPU platform can be configured.
+  gpu_platform_count = 0
+  if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1':
+    gpu_platform_count += 1
+  if environ_cp.get('TF_NEED_ROCM') == '1':
+    gpu_platform_count += 1
+  if environ_cp.get('TF_NEED_CUDA') == '1':
+    gpu_platform_count += 1
+  if gpu_platform_count >= 2:
+    raise UserInputError('SYCL / CUDA / ROCm are mututally exclusive. '
+                         'At most 1 GPU platform can be configured.')
+
   set_build_var(environ_cp, 'TF_NEED_MPI', 'MPI', 'with_mpi_support', False)
   if environ_cp.get('TF_NEED_MPI') == '1':
     set_mpi_home(environ_cp)
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index c06fea130f..d5dfb8c813 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -146,7 +146,7 @@ load(
     "if_static",
     "tf_cuda_tests_tags",
 )
-load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
+load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda", "if_cuda_is_configured")
 load("@io_bazel_rules_closure//closure:defs.bzl", "closure_proto_library")
 load(
     "//third_party/mkl:build_defs.bzl",
@@ -2941,7 +2941,7 @@ tf_cuda_library(
         "platform/device_tracer.h",
     ],
     copts = tf_copts(),
-    cuda_deps = tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps(),
+    cuda_deps = if_cuda_is_configured(tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps()),
     visibility = ["//visibility:private"],
     deps = [
         ":core_cpu_internal",
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 25063ac823..68fa8fa481 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -55,7 +55,8 @@ load(
     "if_mkl_ml",
     "mkl_deps",
 )
-load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
+load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda", "if_cuda_is_configured")
+load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm", "if_rocm_is_configured")
 
 config_setting(
     # Add "--define tensorflow_xsmm=1" to your build command to use libxsmm for
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index adac895a17..f51a628ca3 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -17,8 +17,15 @@ load(
 )
 load(
     "@local_config_cuda//cuda:build_defs.bzl",
-    "cuda_default_copts",
     "if_cuda",
+    "if_cuda_is_configured",
+    "cuda_default_copts",
+)
+load(
+    "@local_config_rocm//rocm:build_defs.bzl",
+    "if_rocm",
+    "if_rocm_is_configured",
+    "rocm_default_copts",
 )
 load(
     "//third_party/mkl:build_defs.bzl",
@@ -860,12 +867,14 @@ def tf_cuda_only_cc_test(
         srcs = srcs + tf_binary_additional_srcs(),
         size = size,
         args = args,
-        copts = _cuda_copts() + tf_copts(),
+        copts = _cuda_copts() + _rocm_copts() + tf_copts(),
         data = data + tf_binary_dynamic_kernel_dsos(kernels),
-        deps = deps + tf_binary_dynamic_kernel_deps(kernels) + if_cuda([
-            clean_dep("//tensorflow/core:cuda"),
-            clean_dep("//tensorflow/core:gpu_lib"),
-        ]),
+        deps = deps + tf_binary_dynamic_kernel_deps(kernels) +
+            if_cuda_is_configured([
+                clean_dep("//tensorflow/core:cuda"),
+                clean_dep("//tensorflow/core:gpu_lib")]) +
+            if_rocm_is_configured([
+                clean_dep("//tensorflow/core:gpu_lib")]),
         linkopts = if_not_windows(["-lpthread", "-lm"]) + linkopts + _rpath_linkopts(name),
         linkstatic = linkstatic or select({
             # cc_tests with ".so"s in srcs incorrectly link on Darwin
@@ -1000,7 +1009,7 @@ register_extension_info(
     label_regex_for_dep = "{extension_name}",
 )
 
-def _cuda_copts():
+def _cuda_copts(opts = []):
     """Gets the appropriate set of copts for (maybe) CUDA compilation.
 
       If we're doing CUDA compilation, returns copts for our particular CUDA
@@ -1016,13 +1025,31 @@ def _cuda_copts():
         "@local_config_cuda//cuda:using_clang": ([
             "-fcuda-flush-denormals-to-zero",
         ]),
-    })
+    }) + if_cuda_is_configured(opts)
+
+def _rocm_copts(opts = []):
+    """Gets the appropriate set of copts for (maybe) ROCm compilation.
+
+      If we're doing ROCm compilation, returns copts for our particular ROCm
+      compiler.  If we're not doing ROCm compilation, returns an empty list.
+
+      """
+    return rocm_default_copts() + select({
+        "//conditions:default": [],
+        "@local_config_rocm//rocm:using_hipcc": ([
+            "",
+        ])
+    }) + if_rocm_is_configured(opts)
 
 # Build defs for TensorFlow kernels
 
 # When this target is built using --config=cuda, a cc_library is built
 # that passes -DGOOGLE_CUDA=1 and '-x cuda', linking in additional
 # libraries needed by GPU kernels.
+#
+# When this target is built using --config=rocm, a cc_library is built
+# that passes -DTENSORFLOW_USE_ROCM and '-x rocm', linking in additional
+# libraries needed by GPU kernels.
 def tf_gpu_kernel_library(
         srcs,
         copts = [],
@@ -1030,16 +1057,18 @@ def tf_gpu_kernel_library(
         deps = [],
         hdrs = [],
         **kwargs):
-    copts = copts + _cuda_copts() + if_cuda(cuda_copts) + tf_copts()
+    copts = copts + tf_copts() + _cuda_copts(opts = cuda_copts) + _rocm_copts(opts = cuda_copts)
     kwargs["features"] = kwargs.get("features", []) + ["-use_header_modules"]
 
     native.cc_library(
         srcs = srcs,
         hdrs = hdrs,
         copts = copts,
-        deps = deps + if_cuda([
+        deps = deps + if_cuda_is_configured([
             clean_dep("//tensorflow/core:cuda"),
             clean_dep("//tensorflow/core:gpu_lib"),
+        ]) + if_rocm_is_configured([
+            clean_dep("//tensorflow/core:gpu_lib"),
         ]),
         alwayslink = 1,
         **kwargs
@@ -1075,11 +1104,13 @@ def tf_cuda_library(deps = None, cuda_deps = None, copts = tf_copts(), **kwargs)
 
     kwargs["features"] = kwargs.get("features", []) + ["-use_header_modules"]
     native.cc_library(
-        deps = deps + if_cuda(cuda_deps + [
+        deps = deps + if_cuda_is_configured(cuda_deps + [
             clean_dep("//tensorflow/core:cuda"),
-            "@local_config_cuda//cuda:cuda_headers",
+            "@local_config_cuda//cuda:cuda_headers"
+        ]) + if_rocm_is_configured(cuda_deps + [
+            "@local_config_rocm//rocm:rocm_headers"
         ]),
-        copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_mkl(["-DINTEL_MKL=1"]) +
+        copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_rocm(["-DTENSORFLOW_USE_ROCM=1"]) + if_mkl(["-DINTEL_MKL=1"]) +
                  if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) +
                  if_tensorrt(["-DGOOGLE_TENSORRT=1"])),
         **kwargs
@@ -1459,6 +1490,9 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
         "@local_config_cuda//cuda:cuda_headers",
         "@local_config_cuda//cuda:cudart_static",
     ]
+    rocm_deps = [
+        clean_dep("//tensorflow/core:stream_executor_headers_lib"),
+    ]
     deps = deps + tf_custom_op_library_additional_deps()
     if gpu_srcs:
         basename = name.split(".")[0]
@@ -1467,13 +1501,14 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
             srcs = gpu_srcs,
             copts = _cuda_copts() + if_tensorrt(["-DGOOGLE_TENSORRT=1"]),
             features = if_cuda(["-use_header_modules"]),
-            deps = deps + if_cuda(cuda_deps),
+            deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps)
         )
         cuda_deps.extend([":" + basename + "_gpu"])
+        rocm_deps.extend([":" + basename + "_gpu"])
 
     check_deps(
         name = name + "_check_deps",
-        deps = deps + if_cuda(cuda_deps),
+        deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps),
         disallowed_deps = [
             clean_dep("//tensorflow/core:framework"),
             clean_dep("//tensorflow/core:lib"),
@@ -1482,7 +1517,7 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
     tf_cc_shared_object(
         name = name,
         srcs = srcs,
-        deps = deps + if_cuda(cuda_deps),
+        deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps),
         data = if_static([name + "_check_deps"]),
         copts = tf_copts(is_external = True),
         features = ["windows_export_all_symbols"],
diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm
new file mode 100644
index 0000000000..aadaa8bac1
--- /dev/null
+++ b/tensorflow/tools/ci_build/Dockerfile.rocm
@@ -0,0 +1,97 @@
+# This Dockerfile provides a starting point for a ROCm installation of 
+# MIOpen and tensorflow.  
+FROM ubuntu:xenial
+MAINTAINER Jeff Poznanovic <jeffrey.poznanovic@amd.com>
+
+ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/debian/
+ARG ROCM_PATH=/opt/rocm
+
+ENV DEBIAN_FRONTEND noninteractive
+ENV TF_NEED_ROCM 1
+ENV HOME /root/
+RUN apt update && apt install -y wget software-properties-common 
+
+# Add rocm repository
+RUN apt-get clean all
+RUN wget -qO - $DEB_ROCM_REPO/rocm.gpg.key | apt-key add -
+RUN sh -c  "echo deb [arch=amd64] $DEB_ROCM_REPO xenial main > /etc/apt/sources.list.d/rocm.list"
+
+# Install misc pkgs
+RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+  build-essential \
+  clang-3.8 \
+  clang-format-3.8 \
+  clang-tidy-3.8 \
+  cmake \
+  cmake-qt-gui \
+  ssh \
+  curl \
+  apt-utils \
+  pkg-config \
+  g++-multilib \
+  git \
+  libunwind-dev \
+  libfftw3-dev \
+  libelf-dev \
+  libncurses5-dev \
+  libpthread-stubs0-dev \
+  vim \
+  gfortran \
+  libboost-program-options-dev \
+  libssl-dev \
+  libboost-dev \
+  libboost-system-dev \
+  libboost-filesystem-dev \
+  rpm \
+  libnuma-dev \
+  virtualenv \
+  python-pip \
+  python3-pip \
+  wget && \
+  apt-get clean && \
+  rm -rf /var/lib/apt/lists/*
+
+# Install rocm pkgs
+RUN apt-get update --allow-insecure-repositories && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
+    rocm-dev rocm-libs rocm-utils \
+    rocfft miopen-hip miopengemm rocblas hipblas rocrand \
+    rocm-profiler cxlactivitylogger && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN cd ~ && git clone https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP.git
+RUN cd ~/HIP && mkdir -p build && cd build && cmake .. && make package -j && dpkg -i *.deb
+
+ENV HCC_HOME=$ROCM_PATH/hcc
+ENV HIP_PATH=$ROCM_PATH/hip
+ENV OPENCL_ROOT=$ROCM_PATH/opencl
+ENV PATH="$HCC_HOME/bin:$HIP_PATH/bin:${PATH}"
+ENV PATH="$ROCM_PATH/bin:${PATH}"
+ENV PATH="$OPENCL_ROOT/bin:${PATH}"
+
+# Add target file to help determine which device(s) to build for
+RUN echo -e "gfx803\ngfx900" >> /opt/rocm/bin/target.lst
+
+# Setup environment variables, and add those environment variables at the end of ~/.bashrc 
+ARG HCC_HOME=/opt/rocm/hcc
+ARG HIP_PATH=/opt/rocm/hip
+ARG PATH=$HCC_HOME/bin:$HIP_PATH/bin:$PATH
+
+# Copy and run the install scripts.
+COPY install/*.sh /install/
+ARG DEBIAN_FRONTEND=noninteractive
+RUN /install/install_bootstrap_deb_packages.sh
+RUN add-apt-repository -y ppa:openjdk-r/ppa && \
+    add-apt-repository -y ppa:george-edison55/cmake-3.x
+RUN /install/install_deb_packages.sh
+RUN /install/install_pip_packages.sh
+RUN /install/install_bazel.sh
+RUN /install/install_golang.sh
+
+# Set up the master bazelrc configuration file.
+COPY install/.bazelrc /etc/bazel.bazelrc
+
+# Configure the build for our CUDA configuration.
+ENV TF_NEED_ROCM 1
+
diff --git a/tensorflow/tools/ci_build/builds/docker_test.sh b/tensorflow/tools/ci_build/builds/docker_test.sh
index e337ea4b05..38891b60e5 100755
--- a/tensorflow/tools/ci_build/builds/docker_test.sh
+++ b/tensorflow/tools/ci_build/builds/docker_test.sh
@@ -19,7 +19,7 @@
 #
 # Usage: docker_test.sh <IMAGE_TYPE> <TAG> <WHL_PATH>
 # Arguments:
-#   IMAGE_TYPE : Type of the image: (CPU|GPU)
+#   IMAGE_TYPE : Type of the image: (CPU|GPU|ROCM)
 #   TAG        : Docker image tag
 #   WHL_PATH   : Path to the whl file to be installed inside the docker image
 #
@@ -60,6 +60,8 @@ if [[ "${IMAGE_TYPE}" == "cpu" ]]; then
   DOCKERFILE="tensorflow/tools/docker/Dockerfile"
 elif [[ "${IMAGE_TYPE}" == "gpu" ]]; then
   DOCKERFILE="tensorflow/tools/docker/Dockerfile.gpu"
+elif [[ "${IMAGE_TYPE}" == "rocm" ]]; then
+  DOCKERFILE="tensorflow/tools/docker/Dockerfile.rocm"
 else
   die "Unrecognized image type: $1"
 fi
@@ -106,13 +108,16 @@ if [ "${IMAGE_TYPE}" == "gpu" ]; then
   devices=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
   libs=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}')
   GPU_EXTRA_PARAMS="${devices} ${libs}"
+elif [ "${IMAGE_TYPE}" == "rocm" ]; then
+  ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video"
 else
   GPU_EXTRA_PARAMS=""
+  ROCM_EXTRA_PARAMS=""
 fi
 
 # Run docker image with source directory mapped
 docker run -v ${BASE_DIR}:/tensorflow-src -w /tensorflow-src \
-${GPU_EXTRA_PARAMS} \
+${GPU_EXTRA_PARAMS} ${ROCM_EXTRA_PARAMS} \
 "${DOCKER_IMG_TAG}" \
 /bin/bash -c "tensorflow/tools/ci_build/builds/run_pip_tests.sh && "\
 "tensorflow/tools/ci_build/builds/test_tutorials.sh && "\
diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh
index fef121ab5a..6543779022 100755
--- a/tensorflow/tools/ci_build/builds/pip.sh
+++ b/tensorflow/tools/ci_build/builds/pip.sh
@@ -132,6 +132,7 @@ echo "Using Bazel flags: ${BAZEL_FLAGS}"
 PIP_BUILD_TARGET="//tensorflow/tools/pip_package:build_pip_package"
 GPU_FLAG=""
 if [[ ${CONTAINER_TYPE} == "cpu" ]] || \
+   [[ ${CONTAINER_TYPE} == "rocm" ]] || \
    [[ ${CONTAINER_TYPE} == "debian.jessie.cpu" ]]; then
   bazel build ${BAZEL_FLAGS} ${PIP_BUILD_TARGET} || \
       die "Build failed."
@@ -255,7 +256,8 @@ if [[ $(uname) == "Linux" ]]; then
       die "ERROR: Cannot find repaired wheel."
     fi
   # Copy and rename for gpu manylinux as we do not want auditwheel to package in libcudart.so
-  elif [[ ${CONTAINER_TYPE} == "gpu" ]]; then
+  elif [[ ${CONTAINER_TYPE} == "gpu" ]] || \
+       [[ ${CONTAINER_TYPE} == "rocm" ]]; then
     WHL_PATH=${AUDITED_WHL_NAME}
     cp ${WHL_DIR}/${WHL_BASE_NAME} ${WHL_PATH}
     echo "Copied manylinx1 wheel file at ${WHL_PATH}"
diff --git a/tensorflow/tools/ci_build/builds/with_the_same_user b/tensorflow/tools/ci_build/builds/with_the_same_user
index b216e3549f..1cc5aed15d 100755
--- a/tensorflow/tools/ci_build/builds/with_the_same_user
+++ b/tensorflow/tools/ci_build/builds/with_the_same_user
@@ -48,6 +48,12 @@ getent passwd "${CI_BUILD_UID}" || adduser ${ADDUSER_OPTS} \
 usermod -a -G sudo "${CI_BUILD_USER}"
 echo "${CI_BUILD_USER} ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-nopasswd-sudo
 
+if [[ "${TF_NEED_ROCM}" -eq 1 ]]; then
+  # ROCm requires the video group in order to use the GPU for compute. If it
+  # exists on the host, add it to the container.
+  getent group video || addgroup video && adduser "${CI_BUILD_USER}" video
+fi
+
 if [ -e /root/.bazelrc ]; then
   cp /root/.bazelrc "${CI_BUILD_HOME}/.bazelrc"
   chown "${CI_BUILD_UID}:${CI_BUILD_GID}" "${CI_BUILD_HOME}/.bazelrc"
diff --git a/tensorflow/tools/ci_build/ci_build.sh b/tensorflow/tools/ci_build/ci_build.sh
index 77265e0f50..eab0616513 100755
--- a/tensorflow/tools/ci_build/ci_build.sh
+++ b/tensorflow/tools/ci_build/ci_build.sh
@@ -18,7 +18,7 @@
 #                    <COMMAND>
 #
 # CONTAINER_TYPE: Type of the docker container used the run the build:
-#                 e.g., (cpu | gpu | android | tensorboard)
+#                 e.g., (cpu | gpu | rocm | android | tensorboard)
 #
 # DOCKERFILE_PATH: (Optional) Path to the Dockerfile used for docker build.
 #                  If this optional value is not supplied (via the
@@ -103,6 +103,14 @@ if [[ "${CONTAINER_TYPE}" != gpu* ]]; then
   GPU_EXTRA_PARAMS=""
 fi
 
+# Add extra params for rocm devices and libraries for ROCm container.
+if [[ "${CONTAINER_TYPE}" == "rocm" ]]; then
+  ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video"
+else
+  ROCM_EXTRA_PARAMS=""
+fi
+
+
 # Determine the docker image name
 DOCKER_IMG_NAME="${BUILD_TAG}.${CONTAINER_TYPE}"
 
@@ -159,6 +167,7 @@ ${DOCKER_BINARY} run --rm --pid=host \
     -v ${WORKSPACE}:/workspace \
     -w /workspace \
     ${GPU_EXTRA_PARAMS} \
+    ${ROCM_EXTRA_PARAMS} \
     ${CI_DOCKER_EXTRA_PARAMS[@]} \
     "${DOCKER_IMG_NAME}" \
     ${CI_COMMAND_PREFIX[@]} \
diff --git a/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh b/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh
index 8eeddcdb82..3b5c92d148 100755
--- a/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh
+++ b/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh
@@ -26,6 +26,7 @@ echo ""
 
 # Run configure.
 export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
 export CC_OPT_FLAGS='-mavx'
 # Only running cc tests, python version does not matter.
 export PYTHON_BIN_PATH=`which python`
diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh b/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh
index 8eca1987f0..52eff6330f 100755
--- a/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh
+++ b/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh
@@ -26,6 +26,7 @@ echo ""
 
 # Run configure.
 export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
 export CC_OPT_FLAGS='-mavx'
 export PYTHON_BIN_PATH=`which python2`
 yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh b/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh
index f6fa9251d4..d12027599a 100755
--- a/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh
+++ b/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh
@@ -26,6 +26,7 @@ echo ""
 
 # Run configure.
 export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
 export CC_OPT_FLAGS='-mavx'
 export PYTHON_BIN_PATH=`which python3`
 yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh b/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh
index 51eb2cd7e6..7c531a4d68 100755
--- a/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh
+++ b/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh
@@ -26,6 +26,7 @@ echo ""
 
 # Run configure.
 export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
 export CC_OPT_FLAGS='-mavx'
 export PYTHON_BIN_PATH=`which python3`
 yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/ci_build/linux/libtensorflow.sh b/tensorflow/tools/ci_build/linux/libtensorflow.sh
index beef8e063b..3b6e15feb9 100755
--- a/tensorflow/tools/ci_build/linux/libtensorflow.sh
+++ b/tensorflow/tools/ci_build/linux/libtensorflow.sh
@@ -27,5 +27,8 @@ SUFFIX="-cpu-linux-"
 if [ "${TF_NEED_CUDA}" == "1" ]; then
   SUFFIX="-gpu-linux-"
 fi
+if [ "${TF_NEED_ROCM}" == "1" ]; then
+  SUFFIX="-rocm-linux-"
+fi
 
 build_libtensorflow_tarball "${SUFFIX}$(uname -m)"
diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh
index 4bf34dd299..b76262b6e9 100755
--- a/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh
+++ b/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh
@@ -19,4 +19,5 @@
 set -ex
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
 "${SCRIPT_DIR}/libtensorflow_docker.sh"
diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
index 60c974c36b..467b8dc808 100755
--- a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
+++ b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
@@ -38,6 +38,11 @@ if [ "${TF_NEED_CUDA}" == "1" ]; then
   DOCKER_BINARY="nvidia-docker"
   DOCKER_FILE="Dockerfile.gpu"
 fi
+if [ "${TF_NEED_ROCM}" == "1" ]; then
+  DOCKER_IMAGE="tf-tensorflow-rocm"
+  DOCKER_BINARY="docker"
+  DOCKER_FILE="Dockerfile.rocm"
+fi
 
 docker build \
   -t "${DOCKER_IMAGE}" \
@@ -53,6 +58,7 @@ ${DOCKER_BINARY} run \
   -e "TF_NEED_HDFS=0" \
   -e "TF_NEED_CUDA=${TF_NEED_CUDA}" \
   -e "TF_NEED_TENSORRT=${TF_NEED_CUDA}" \
+  -e "TF_NEED_ROCM=${TF_NEED_ROCM}" \
   -e "TF_NEED_OPENCL_SYCL=0" \
   "${DOCKER_IMAGE}" \
   "/workspace/tensorflow/tools/ci_build/linux/libtensorflow.sh"
diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh b/tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh
new file mode 100755
index 0000000000..c1ebbe3630
--- /dev/null
+++ b/tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Script to build a binary releases of libtensorflow with GPU support.
+
+set -ex
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+export TF_NEED_ROCM=1
+"${SCRIPT_DIR}/libtensorflow_docker.sh"
diff --git a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
new file mode 100755
index 0000000000..200089f90e
--- /dev/null
+++ b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ==============================================================================
+
+set -e
+set -x
+
+N_JOBS=$(grep -c ^processor /proc/cpuinfo)
+
+echo ""
+echo "Bazel will use ${N_JOBS} concurrent job(s)."
+echo ""
+
+# Run configure.
+export PYTHON_BIN_PATH=`which python3`
+export CC_OPT_FLAGS='-mavx'
+
+export TF_NEED_ROCM=1
+
+yes "" | $PYTHON_BIN_PATH configure.py
+
+# Run bazel test command. Double test timeouts to avoid flakes.
+bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchmark-test -k \
+    --test_lang_filters=cc --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \
+    --build_tests_only --test_output=errors --local_test_jobs=1 --config=opt \
+    //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/...
diff --git a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
new file mode 100755
index 0000000000..1d0b838c1b
--- /dev/null
+++ b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ==============================================================================
+
+set -e
+set -x
+
+N_JOBS=$(grep -c ^processor /proc/cpuinfo)
+
+echo ""
+echo "Bazel will use ${N_JOBS} concurrent job(s)."
+echo ""
+
+# Run configure.
+export PYTHON_BIN_PATH=`which python3`
+export CC_OPT_FLAGS='-mavx'
+
+export TF_NEED_ROCM=1
+
+yes "" | $PYTHON_BIN_PATH configure.py
+
+# Run bazel test command. Double test timeouts to avoid flakes.
+bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchmark-test -k \
+    --test_lang_filters=py --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \
+    --build_tests_only --test_output=errors --local_test_jobs=1 --config=opt \
+    //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/...
diff --git a/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh b/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh
index c7cc16e669..adee0d3171 100755
--- a/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh
+++ b/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh
@@ -27,6 +27,7 @@ echo ""
 
 # Run configure.
 export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
 export CC_OPT_FLAGS='-mavx'
 export PYTHON_BIN_PATH=$(which python2)
 yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
index 9ae5fc6bea..06798adc03 100755
--- a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
+++ b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
@@ -26,6 +26,7 @@ source "${SCRIPT_DIR}/../builds/libtensorflow.sh"
 export PYTHON_BIN_PATH="/usr/bin/python"
 export TF_NEED_HDFS=0
 export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
 export TF_NEED_OPENCL_SYCL=0
 export TF_NEED_MKL=0
 export COMPUTECPP_PATH="/usr/local"
diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
index d95fcdeb85..95f1992d7d 100755
--- a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
+++ b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
@@ -27,6 +27,7 @@ export TF_NEED_CUDA=1
 export LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/extras/CUPTI/lib:${LD_LIBRARY_PATH}"
 export PYTHON_BIN_PATH="/usr/bin/python"
 export TF_NEED_HDFS=0
+export TF_NEED_ROCM=0
 export TF_NEED_OPENCL_SYCL=0
 export TF_NEED_MKL=0
 export COMPUTECPP_PATH="/usr/local"
diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh b/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh
new file mode 100755
index 0000000000..aeabc0e39e
--- /dev/null
+++ b/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Script to produce binary release of libtensorflow (C API, Java jars etc.).
+
+set -ex
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# See comments at the top of this file for details.
+source "${SCRIPT_DIR}/../builds/libtensorflow.sh"
+
+# Configure script
+export TF_NEED_ROCM=1
+export PYTHON_BIN_PATH="/usr/bin/python"
+export TF_NEED_GCP=0
+export TF_NEED_HDFS=0
+export TF_NEED_CUDA=0
+export TF_NEED_OPENCL_SYCL=0
+export TF_NEED_MKL=0
+export COMPUTECPP_PATH="/usr/local"
+
+export PATH="/usr/local/cuda/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin"
+build_libtensorflow_tarball "-gpu-darwin-$(uname -m)"
diff --git a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
new file mode 100755
index 0000000000..a0de128020
--- /dev/null
+++ b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ==============================================================================
+
+set -e
+set -x
+
+N_JOBS=$(grep -c ^processor /proc/cpuinfo)
+
+echo ""
+echo "Bazel will use ${N_JOBS} concurrent job(s)."
+echo ""
+
+# Run configure.
+export PYTHON_BIN_PATH=`which python3`
+
+export TF_NEED_ROCM=1
+
+yes "" | $PYTHON_BIN_PATH configure.py
+echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc
+
+bazel clean
+# Run bazel test command. Double test timeouts to avoid flakes.
+bazel test --config=rocm --test_tag_filters=-no_gpu,-benchmark-test,-no_oss -k \
+    --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \
+    --build_tests_only --test_output=errors --local_test_jobs=1 \
+    --config=xla -- \
+    //tensorflow/compiler/...
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 1e7c5d6790..87d1243563 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -1,6 +1,7 @@
 # TensorFlow external dependencies that can be loaded in WORKSPACE files.
 
 load("//third_party/gpus:cuda_configure.bzl", "cuda_configure")
+load("//third_party/gpus:rocm_configure.bzl", "rocm_configure")
 load("//third_party/tensorrt:tensorrt_configure.bzl", "tensorrt_configure")
 load("//third_party:nccl/nccl_configure.bzl", "nccl_configure")
 load("//third_party/mkl:build_defs.bzl", "mkl_repository")
@@ -43,6 +44,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     sycl_configure(name = "local_config_sycl")
     syslibs_configure(name = "local_config_syslibs")
     python_configure(name = "local_config_python")
+    rocm_configure(name="local_config_rocm")
 
     initialize_third_party()
 
diff --git a/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl b/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl
new file mode 100644
index 0000000000..0e175b3ef6
--- /dev/null
+++ b/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl
@@ -0,0 +1,158 @@
+major_version: "local"
+minor_version: ""
+default_target_cpu: "same_as_host"
+
+default_toolchain {
+  cpu: "k8"
+  toolchain_identifier: "local_linux"
+}
+default_toolchain {
+  cpu: "piii"
+  toolchain_identifier: "local_linux"
+}
+default_toolchain {
+  cpu: "arm"
+  toolchain_identifier: "local_linux"
+}
+default_toolchain {
+  cpu: "ppc"
+  toolchain_identifier: "local_linux"
+}
+
+toolchain {
+  abi_version: "local"
+  abi_libc_version: "local"
+  builtin_sysroot: ""
+  compiler: "compiler"
+  host_system_name: "local"
+  needsPic: true
+  supports_gold_linker: false
+  supports_incremental_linker: false
+  supports_fission: false
+  supports_interface_shared_objects: false
+  supports_normalizing_ar: false
+  supports_start_end_lib: false
+  supports_thin_archives: false
+  target_libc: "local"
+  target_cpu: "local"
+  target_system_name: "local"
+  toolchain_identifier: "local_linux"
+
+  tool_path { name: "ar" path: "/usr/bin/ar" }
+  tool_path { name: "compat-ld" path: "/usr/bin/ld" }
+  tool_path { name: "cpp" path: "/usr/bin/cpp" }
+  tool_path { name: "dwp" path: "/usr/bin/dwp" }
+  # As part of the TensorFlow release, we place some ROCm-related compilation
+  # files in @local_config_rocm//crosstool/clang/bin, and this relative
+  # path, combined with the rest of our Bazel configuration causes our
+  # compilation to use those files.
+  tool_path { name: "gcc" path: "clang/bin/crosstool_wrapper_driver_rocm" }
+  # Use "-std=c++11" for hipcc. For consistency, force both the host compiler
+  # and the device compiler to use "-std=c++11".
+  cxx_flag: "-std=c++11"
+  linker_flag: "-Wl,-no-as-needed"
+  linker_flag: "-lstdc++"
+  #linker_flag: "-B/usr/bin/"
+  linker_flag: "-B/opt/rocm/hcc/compiler/bin"
+
+%{host_compiler_includes}
+  tool_path { name: "gcov" path: "/usr/bin/gcov" }
+
+  # C(++) compiles invoke the compiler (as that is the one knowing where
+  # to find libraries), but we provide LD so other rules can invoke the linker.
+  tool_path { name: "ld" path: "/usr/bin/ld" }
+
+  tool_path { name: "nm" path: "/usr/bin/nm" }
+  tool_path { name: "objcopy" path: "/usr/bin/objcopy" }
+  objcopy_embed_flag: "-I"
+  objcopy_embed_flag: "binary"
+  tool_path { name: "objdump" path: "/usr/bin/objdump" }
+  tool_path { name: "strip" path: "/usr/bin/strip" }
+
+  # Anticipated future default.
+  unfiltered_cxx_flag: "-no-canonical-prefixes"
+
+  # Make C++ compilation deterministic. Use linkstamping instead of these
+  # compiler symbols.
+  unfiltered_cxx_flag: "-Wno-builtin-macro-redefined"
+  unfiltered_cxx_flag: "-D__DATE__=\"redacted\""
+  unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\""
+  unfiltered_cxx_flag: "-D__TIME__=\"redacted\""
+  unfiltered_cxx_flag: "-D__HIP_PLATFORM_HCC__"
+  # The macro EIGEN_USE_HIP is used to tell Eigen to use the HIP platform headers
+  # It needs to be always set when compiling Eigen headers
+  # (irrespective of whether the source file is being compiled via HIPCC)
+  # so adding -DEIGEN_USE_HIP as a default CXX flag here
+  unfiltered_cxx_flag: "-DEIGEN_USE_HIP"
+
+    
+  # Security hardening on by default.
+  # Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases.
+  # We need to undef it before redefining it as some distributions now have
+  # it enabled by default.
+  #compiler_flag: "-U_FORTIFY_SOURCE"
+  #compiler_flag: "-D_FORTIFY_SOURCE=1"
+  #compiler_flag: "-fstack-protector"
+  #compiler_flag: "-fPIE"
+  #linker_flag: "-pie"
+  #linker_flag: "-Wl,-z,relro,-z,now"
+
+  # Enable coloring even if there's no attached terminal. Bazel removes the
+  # escape sequences if --nocolor is specified. This isn't supported by gcc
+  # on Ubuntu 14.04.
+  # compiler_flag: "-fcolor-diagnostics"
+
+  # All warnings are enabled. Maybe enable -Werror as well?
+  compiler_flag: "-Wall"
+  # Enable a few more warnings that aren't part of -Wall.
+  compiler_flag: "-Wunused-but-set-parameter"
+  # But disable some that are problematic.
+  compiler_flag: "-Wno-free-nonheap-object" # has false positives
+
+  # Keep stack frames for debugging, even in opt mode.
+  compiler_flag: "-fno-omit-frame-pointer"
+
+  # Anticipated future default.
+  linker_flag: "-no-canonical-prefixes"
+  unfiltered_cxx_flag: "-fno-canonical-system-headers"
+  # Have gcc return the exit code from ld.
+  linker_flag: "-pass-exit-codes"
+  # Stamp the binary with a unique identifier.
+  linker_flag: "-Wl,--build-id=md5"
+  linker_flag: "-Wl,--hash-style=gnu"
+  # Gold linker only? Can we enable this by default?
+  # linker_flag: "-Wl,--warn-execstack"
+  # linker_flag: "-Wl,--detect-odr-violations"
+
+  # Include directory for ROCm headers.
+%{rocm_include_path}
+
+  compilation_mode_flags {
+    mode: DBG
+    # Enable debug symbols.
+    compiler_flag: "-g"
+  }
+  compilation_mode_flags {
+    mode: OPT
+
+    # No debug symbols.
+    # Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or
+    # even generally? However, that can't happen here, as it requires special
+    # handling in Bazel.
+    compiler_flag: "-g0"
+
+    # Conservative choice for -O
+    # -O3 can increase binary size and even slow down the resulting binaries.
+    # Profile first and / or use FDO if you need better performance than this.
+    compiler_flag: "-O2"
+
+    # Disable assertions
+    compiler_flag: "-DNDEBUG"
+
+    # Removal of unused code and data at link time (can this increase binary size in some cases?).
+    compiler_flag: "-ffunction-sections"
+    compiler_flag: "-fdata-sections"
+    linker_flag: "-Wl,--gc-sections"
+  }
+  linking_mode_flags { mode: DYNAMIC }
+}
diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
new file mode 100755
index 0000000000..824238022b
--- /dev/null
+++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
@@ -0,0 +1,241 @@
+#!/usr/bin/env python
+"""Crosstool wrapper for compiling ROCm programs.
+
+SYNOPSIS:
+  crosstool_wrapper_driver_rocm [options passed in by cc_library()
+                                or cc_binary() rule]
+
+DESCRIPTION:
+  This script is expected to be called by the cc_library() or cc_binary() bazel
+  rules. When the option "-x rocm" is present in the list of arguments passed
+  to this script, it invokes the hipcc compiler. Most arguments are passed
+  as is as a string to --compiler-options of hipcc. When "-x rocm" is not
+  present, this wrapper invokes gcc with the input arguments as is.
+"""
+
+from __future__ import print_function
+
+__author__ = 'whchung@gmail.com (Wen-Heng (Jack) Chung)'
+
+from argparse import ArgumentParser
+import os
+import subprocess
+import re
+import sys
+import pipes
+
+# Template values set by rocm_configure.bzl.
+CPU_COMPILER = ('%{cpu_compiler}')
+GCC_HOST_COMPILER_PATH = ('%{gcc_host_compiler_path}')
+
+HIPCC_PATH = '%{hipcc_path}'
+PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH)
+
+def Log(s):
+  print('gpus/crosstool: {0}'.format(s))
+
+
+def GetOptionValue(argv, option):
+  """Extract the list of values for option from the argv list.
+
+  Args:
+    argv: A list of strings, possibly the argv passed to main().
+    option: The option whose value to extract, without the leading '-'.
+
+  Returns:
+    A list of values, either directly following the option,
+    (eg., -opt val1 val2) or values collected from multiple occurrences of
+    the option (eg., -opt val1 -opt val2).
+  """
+
+  parser = ArgumentParser()
+  parser.add_argument('-' + option, nargs='*', action='append')
+  args, _ = parser.parse_known_args(argv)
+  if not args or not vars(args)[option]:
+    return []
+  else:
+    return sum(vars(args)[option], [])
+
+
+def GetHostCompilerOptions(argv):
+  """Collect the -isystem, -iquote, and --sysroot option values from argv.
+
+  Args:
+    argv: A list of strings, possibly the argv passed to main().
+
+  Returns:
+    The string that can be used as the --compiler-options to hipcc.
+  """
+
+  parser = ArgumentParser()
+  parser.add_argument('-isystem', nargs='*', action='append')
+  parser.add_argument('-iquote', nargs='*', action='append')
+  parser.add_argument('--sysroot', nargs=1)
+  parser.add_argument('-g', nargs='*', action='append')
+  parser.add_argument('-fno-canonical-system-headers', action='store_true')
+
+  args, _ = parser.parse_known_args(argv)
+
+  opts = ''
+
+  if args.isystem:
+    opts += ' -isystem ' + ' -isystem '.join(sum(args.isystem, []))
+  if args.iquote:
+    opts += ' -iquote ' + ' -iquote '.join(sum(args.iquote, []))
+  if args.g:
+    opts += ' -g' + ' -g'.join(sum(args.g, []))
+  #if args.fno_canonical_system_headers:
+  #  opts += ' -fno-canonical-system-headers'
+  if args.sysroot:
+    opts += ' --sysroot ' + args.sysroot[0]
+
+  return opts
+
+def GetHipccOptions(argv):
+  """Collect the -hipcc_options values from argv.
+
+  Args:
+    argv: A list of strings, possibly the argv passed to main().
+
+  Returns:
+    The string that can be passed directly to hipcc.
+  """
+
+  parser = ArgumentParser()
+  parser.add_argument('-hipcc_options', nargs='*', action='append')
+
+  args, _ = parser.parse_known_args(argv)
+
+  if args.hipcc_options:
+    options = _update_options(sum(args.hipcc_options, []))
+    return ' '.join(['--'+a for a in options])
+  return ''
+
+
+def InvokeHipcc(argv, log=False):
+  """Call hipcc with arguments assembled from argv.
+
+  Args:
+    argv: A list of strings, possibly the argv passed to main().
+    log: True if logging is requested.
+
+  Returns:
+    The return value of calling os.system('hipcc ' + args)
+  """
+
+  host_compiler_options = GetHostCompilerOptions(argv)
+  hipcc_compiler_options = GetHipccOptions(argv)
+  opt_option = GetOptionValue(argv, 'O')
+  m_options = GetOptionValue(argv, 'm')
+  m_options = ''.join([' -m' + m for m in m_options if m in ['32', '64']])
+  include_options = GetOptionValue(argv, 'I')
+  out_file = GetOptionValue(argv, 'o')
+  depfiles = GetOptionValue(argv, 'MF')
+  defines = GetOptionValue(argv, 'D')
+  defines = ''.join([' -D' + define for define in defines])
+  undefines = GetOptionValue(argv, 'U')
+  undefines = ''.join([' -U' + define for define in undefines])
+  std_options = GetOptionValue(argv, 'std')
+  hipcc_allowed_std_options = ["c++11"]
+  std_options = ''.join([' -std=' + define
+      for define in std_options if define in hipcc_allowed_std_options])
+
+  # The list of source files get passed after the -c option. I don't know of
+  # any other reliable way to just get the list of source files to be compiled.
+  src_files = GetOptionValue(argv, 'c')
+
+  if len(src_files) == 0:
+    return 1
+  if len(out_file) != 1:
+    return 1
+
+  opt = (' -O2' if (len(opt_option) > 0 and int(opt_option[0]) > 0)
+         else ' -g')
+
+  includes = (' -I ' + ' -I '.join(include_options)
+              if len(include_options) > 0
+              else '')
+
+  # Unfortunately, there are other options that have -c prefix too.
+  # So allowing only those look like C/C++ files.
+  src_files = [f for f in src_files if
+               re.search('\.cpp$|\.cc$|\.c$|\.cxx$|\.C$', f)]
+  srcs = ' '.join(src_files)
+  out = ' -o ' + out_file[0]
+
+  hipccopts = ' '
+  hipccopts += ' ' + hipcc_compiler_options
+  hipccopts += undefines
+  hipccopts += defines
+  hipccopts += std_options
+  hipccopts += m_options
+
+  if depfiles:
+    # Generate the dependency file
+    depfile = depfiles[0]
+    cmd = (HIPCC_PATH + ' ' + hipccopts +
+           host_compiler_options +
+           ' ' + GCC_HOST_COMPILER_PATH +
+           ' -I .' + includes + ' ' + srcs + ' -M -o ' + depfile)
+    if log: Log(cmd)
+    exit_status = os.system(cmd)
+    if exit_status != 0:
+      return exit_status
+
+  cmd = (HIPCC_PATH + ' ' + hipccopts +
+         host_compiler_options + ' -fPIC' +
+         ' ' + GCC_HOST_COMPILER_PATH +
+         ' -I .' + opt + includes + ' -c ' + srcs + out)
+
+  # TODO(zhengxq): for some reason, 'gcc' needs this help to find 'as'.
+  # Need to investigate and fix.
+  cmd = 'PATH=' + PREFIX_DIR + ':$PATH ' + cmd
+  if log: Log(cmd)
+  return os.system(cmd)
+
+
+def main():
+  # ignore PWD env var
+  os.environ['PWD']=''
+
+  parser = ArgumentParser()
+  parser.add_argument('-x', nargs=1)
+  parser.add_argument('--rocm_log', action='store_true')
+  parser.add_argument('-pass-exit-codes', action='store_true')
+  args, leftover = parser.parse_known_args(sys.argv[1:])
+
+  if args.x and args.x[0] == 'rocm':
+    if args.rocm_log: Log('-x rocm')
+    leftover = [pipes.quote(s) for s in leftover]
+    if args.rocm_log: Log('using hipcc')
+    return InvokeHipcc(leftover, log=args.rocm_log)
+
+  # XXX use hipcc to link
+  if args.pass_exit_codes:
+    gpu_compiler_flags = [flag for flag in sys.argv[1:]
+                               if not flag.startswith(('-pass-exit-codes'))]
+
+    # special handling for $ORIGIN
+    # - guard every argument with ''
+    modified_gpu_compiler_flags = []
+    for flag in gpu_compiler_flags:
+      modified_gpu_compiler_flags.append("'" + flag + "'")
+
+    if args.rocm_log: Log('Link with hipcc: %s' % (' '.join([HIPCC_PATH] + modified_gpu_compiler_flags)))
+    return subprocess.call([HIPCC_PATH] + modified_gpu_compiler_flags)
+
+  # Strip our flags before passing through to the CPU compiler for files which
+  # are not -x rocm. We can't just pass 'leftover' because it also strips -x.
+  # We not only want to pass -x to the CPU compiler, but also keep it in its
+  # relative location in the argv list (the compiler is actually sensitive to
+  # this).
+  cpu_compiler_flags = [flag for flag in sys.argv[1:]
+                             if not flag.startswith(('--rocm_log'))]
+
+  # XXX: SE codes need to be built with gcc, but need this macro defined
+  cpu_compiler_flags.append("-D__HIP_PLATFORM_HCC__")
+
+  return subprocess.call([CPU_COMPILER] + cpu_compiler_flags)
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/third_party/gpus/rocm/BUILD b/third_party/gpus/rocm/BUILD
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/third_party/gpus/rocm/BUILD.tpl b/third_party/gpus/rocm/BUILD.tpl
new file mode 100644
index 0000000000..8258bb3589
--- /dev/null
+++ b/third_party/gpus/rocm/BUILD.tpl
@@ -0,0 +1,99 @@
+licenses(["restricted"])  # MPL2, portions GPL v3, LGPL v3, BSD-like
+
+package(default_visibility = ["//visibility:public"])
+
+config_setting(
+    name = "using_hipcc",
+    values = {
+        "define": "using_rocm_hipcc=true",
+    },
+)
+
+cc_library(
+    name = "rocm_headers",
+    hdrs = [
+        "rocm/rocm_config.h",
+        %{rocm_headers}
+    ],
+    includes = [
+        ".",
+        "rocm/include",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "hip",
+    srcs = ["rocm/lib/%{hip_lib}"],
+    data = ["rocm/lib/%{hip_lib}"],
+    includes = [
+        ".",
+        "rocm/include",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "rocblas",
+    srcs = ["rocm/lib/%{rocblas_lib}"],
+    data = ["rocm/lib/%{rocblas_lib}"],
+    includes = [
+        ".",
+        "rocm/include",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "rocfft",
+    srcs = ["rocm/lib/%{rocfft_lib}"],
+    data = ["rocm/lib/%{rocfft_lib}"],
+    includes = [
+        ".",
+        "rocm/include",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "hiprand",
+    srcs = ["rocm/lib/%{hiprand_lib}"],
+    data = ["rocm/lib/%{hiprand_lib}"],
+    includes = [
+        ".",
+        "rocm/include",
+        "rocm/include/rocrand",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "miopen",
+    srcs = ["rocm/lib/%{miopen_lib}"],
+    data = ["rocm/lib/%{miopen_lib}"],
+    includes = [
+        ".",
+        "rocm/include",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "rocm",
+    visibility = ["//visibility:public"],
+    deps = [
+        ":rocm_headers",
+        ":hip",
+        ":rocblas",
+        ":rocfft",
+        ":hiprand",
+        ":miopen",
+    ],
+)
+
+%{rocm_include_genrules}
diff --git a/third_party/gpus/rocm/build_defs.bzl.tpl b/third_party/gpus/rocm/build_defs.bzl.tpl
new file mode 100644
index 0000000000..306f57551f
--- /dev/null
+++ b/third_party/gpus/rocm/build_defs.bzl.tpl
@@ -0,0 +1,32 @@
+# Macros for building ROCm code.
+def if_rocm(if_true, if_false = []):
+    """Shorthand for select()'ing on whether we're building with ROCm.
+
+    Returns a select statement which evaluates to if_true if we're building
+    with ROCm enabled.  Otherwise, the select statement evaluates to if_false.
+
+    """
+    return select({
+        "@local_config_rocm//rocm:using_hipcc": if_true,
+        "//conditions:default": if_false
+    })
+
+
+def rocm_default_copts():
+    """Default options for all ROCm compilations."""
+    return if_rocm(["-x", "rocm"] + %{rocm_extra_copts})
+
+
+def rocm_is_configured():
+    """Returns true if ROCm was enabled during the configure process."""
+    return %{rocm_is_configured}
+
+def if_rocm_is_configured(x):
+    """Tests if the ROCm was enabled during the configure process.
+
+    Unlike if_rocm(), this does not require that we are building with
+    --config=rocm. Used to allow non-ROCm code to depend on ROCm libraries.
+    """
+    if rocm_is_configured():
+      return x
+    return []
diff --git a/third_party/gpus/rocm/rocm_config.h.tpl b/third_party/gpus/rocm/rocm_config.h.tpl
new file mode 100644
index 0000000000..c5f25a845c
--- /dev/null
+++ b/third_party/gpus/rocm/rocm_config.h.tpl
@@ -0,0 +1,21 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef ROCM_ROCM_CONFIG_H_
+#define ROCM_ROCM_CONFIG_H_
+
+#define TF_ROCM_TOOLKIT_PATH "/opt/rocm"
+
+#endif  // ROCM_ROCM_CONFIG_H_
diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl
new file mode 100644
index 0000000000..9371e33f97
--- /dev/null
+++ b/third_party/gpus/rocm_configure.bzl
@@ -0,0 +1,663 @@
+# -*- Python -*-
+"""Repository rule for ROCm autoconfiguration.
+
+`rocm_configure` depends on the following environment variables:
+
+  * `TF_NEED_ROCM`: Whether to enable building with ROCm.
+  * `GCC_HOST_COMPILER_PATH`: The GCC host compiler path
+  * `ROCM_TOOLKIT_PATH`: The path to the ROCm toolkit. Default is
+    `/opt/rocm`.
+  * `TF_ROCM_VERSION`: The version of the ROCm toolkit. If this is blank, then
+    use the system default.
+  * `TF_MIOPEN_VERSION`: The version of the MIOpen library.
+  * `TF_ROCM_AMDGPU_TARGETS`: The AMDGPU targets. Default is
+    `gfx803,gfx900`.
+"""
+
+_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH"
+_ROCM_TOOLKIT_PATH = "ROCM_TOOLKIT_PATH"
+_TF_ROCM_VERSION = "TF_ROCM_VERSION"
+_TF_MIOPEN_VERSION = "TF_MIOPEN_VERSION"
+_TF_ROCM_AMDGPU_TARGETS = "TF_ROCM_AMDGPU_TARGETS"
+_TF_ROCM_CONFIG_REPO = "TF_ROCM_CONFIG_REPO"
+
+_DEFAULT_ROCM_VERSION = ""
+_DEFAULT_MIOPEN_VERSION = ""
+_DEFAULT_ROCM_TOOLKIT_PATH = "/opt/rocm"
+_DEFAULT_ROCM_AMDGPU_TARGETS = ["gfx803", "gfx900"]
+
+def find_cc(repository_ctx):
+  """Find the C++ compiler."""
+  # Return a dummy value for GCC detection here to avoid error
+  target_cc_name = "gcc"
+  cc_path_envvar = _GCC_HOST_COMPILER_PATH
+  cc_name = target_cc_name
+
+  if cc_path_envvar in repository_ctx.os.environ:
+    cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip()
+    if cc_name_from_env:
+      cc_name = cc_name_from_env
+  if cc_name.startswith("/"):
+    # Absolute path, maybe we should make this supported by our which function.
+    return cc_name
+  cc = repository_ctx.which(cc_name)
+  if cc == None:
+    fail(("Cannot find {}, either correct your path or set the {}" +
+          " environment variable").format(target_cc_name, cc_path_envvar))
+  return cc
+
+_INC_DIR_MARKER_BEGIN = "#include <...>"
+
+def _cxx_inc_convert(path):
+  """Convert path returned by cc -E xc++ in a complete path."""
+  path = path.strip()
+  return path
+
+def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp):
+  """Compute the list of default C or C++ include directories."""
+  if lang_is_cpp:
+    lang = "c++"
+  else:
+    lang = "c"
+  # TODO: We pass -no-canonical-prefixes here to match the compiler flags,
+  #       but in rocm_clang CROSSTOOL file that is a `feature` and we should
+  #       handle the case when it's disabled and no flag is passed
+  result = repository_ctx.execute([cc, "-no-canonical-prefixes",
+                                   "-E", "-x" + lang, "-", "-v"])
+  index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
+  if index1 == -1:
+    return []
+  index1 = result.stderr.find("\n", index1)
+  if index1 == -1:
+    return []
+  index2 = result.stderr.rfind("\n ")
+  if index2 == -1 or index2 < index1:
+    return []
+  index2 = result.stderr.find("\n", index2 + 1)
+  if index2 == -1:
+    inc_dirs = result.stderr[index1 + 1:]
+  else:
+    inc_dirs = result.stderr[index1 + 1:index2].strip()
+
+  return [str(repository_ctx.path(_cxx_inc_convert(p)))
+          for p in inc_dirs.split("\n")]
+
+def get_cxx_inc_directories(repository_ctx, cc):
+  """Compute the list of default C and C++ include directories."""
+  # For some reason `clang -xc` sometimes returns include paths that are
+  # different from the ones from `clang -xc++`. (Symlink and a dir)
+  # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
+  includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True)
+  includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False)
+
+  includes_cpp_set = depset(includes_cpp)
+  return includes_cpp + [inc for inc in includes_c
+                         if inc not in includes_cpp_set]
+
+def auto_configure_fail(msg):
+  """Output failure message when rocm configuration fails."""
+  red = "\033[0;31m"
+  no_color = "\033[0m"
+  fail("\n%sROCm Configuration Error:%s %s\n" % (red, no_color, msg))
+# END cc_configure common functions (see TODO above).
+
+def _host_compiler_includes(repository_ctx, cc):
+  """Generates the cxx_builtin_include_directory entries for gcc inc dirs.
+
+  Args:
+    repository_ctx: The repository context.
+    cc: The path to the gcc host compiler.
+
+  Returns:
+    A string containing the cxx_builtin_include_directory for each of the gcc
+    host compiler include directories, which can be added to the CROSSTOOL
+    file.
+  """
+  inc_dirs = get_cxx_inc_directories(repository_ctx, cc)
+
+  # Add numpy headers
+  inc_dirs.append("/usr/lib/python2.7/dist-packages/numpy/core/include")
+
+  entries = []
+  for inc_dir in inc_dirs:
+    entries.append("  cxx_builtin_include_directory: \"%s\"" % inc_dir)
+
+  # define TENSORFLOW_USE_ROCM
+  entries.append("  unfiltered_cxx_flag: \"-DTENSORFLOW_USE_ROCM\"")
+
+  return "\n".join(entries)
+
+def _rocm_include_path(repository_ctx, rocm_config):
+  """Generates the cxx_builtin_include_directory entries for rocm inc dirs.
+
+  Args:
+    repository_ctx: The repository context.
+    cc: The path to the gcc host compiler.
+
+  Returns:
+    A string containing the cxx_builtin_include_directory for each of the gcc
+    host compiler include directories, which can be added to the CROSSTOOL
+    file.
+  """
+  inc_dirs = []
+
+  # general ROCm include path
+  inc_dirs.append(rocm_config.rocm_toolkit_path + '/include')
+
+  # Add HSA headers
+  inc_dirs.append("/opt/rocm/hsa/include")
+
+  # Add HIP headers
+  inc_dirs.append("/opt/rocm/include/hip")
+  inc_dirs.append("/opt/rocm/include/hip/hcc_detail")
+
+  # Add rocrand and hiprand headers
+  inc_dirs.append("/opt/rocm/rocrand/include")
+  inc_dirs.append("/opt/rocm/hiprand/include")
+
+  # Add rocfft headers
+  inc_dirs.append("/opt/rocm/rocfft/include")
+
+  # Add rocBLAS headers
+  inc_dirs.append("/opt/rocm/rocblas/include")
+
+  # Add MIOpen headers
+  inc_dirs.append("/opt/rocm/miopen/include")
+
+  # Add hcc headers
+  inc_dirs.append("/opt/rocm/hcc/include")
+  inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/7.0.0/include/")
+  inc_dirs.append("/opt/rocm/hcc/lib/clang/7.0.0/include")
+  # Newer hcc builds use/are based off of clang 8.0.0.
+  inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/8.0.0/include/")
+  inc_dirs.append("/opt/rocm/hcc/lib/clang/8.0.0/include")
+
+  inc_entries = []
+  for inc_dir in inc_dirs:
+    inc_entries.append("  cxx_builtin_include_directory: \"%s\"" % inc_dir)
+  return "\n".join(inc_entries)
+
+def _enable_rocm(repository_ctx):
+  if "TF_NEED_ROCM" in repository_ctx.os.environ:
+    enable_rocm = repository_ctx.os.environ["TF_NEED_ROCM"].strip()
+    return enable_rocm == "1"
+  return False
+
+def _rocm_toolkit_path(repository_ctx):
+  """Finds the rocm toolkit directory.
+
+  Args:
+    repository_ctx: The repository context.
+
+  Returns:
+    A speculative real path of the rocm toolkit install directory.
+  """
+  rocm_toolkit_path = _DEFAULT_ROCM_TOOLKIT_PATH
+  if _ROCM_TOOLKIT_PATH in repository_ctx.os.environ:
+    rocm_toolkit_path = repository_ctx.os.environ[_ROCM_TOOLKIT_PATH].strip()
+  if not repository_ctx.path(rocm_toolkit_path).exists:
+    auto_configure_fail("Cannot find rocm toolkit path.")
+  return str(repository_ctx.path(rocm_toolkit_path).realpath)
+
+def _amdgpu_targets(repository_ctx):
+  """Returns a list of strings representing AMDGPU targets."""
+  if _TF_ROCM_AMDGPU_TARGETS not in repository_ctx.os.environ:
+    return _DEFAULT_ROCM_AMDGPU_TARGETS
+  amdgpu_targets_str = repository_ctx.os.environ[_TF_ROCM_AMDGPU_TARGETS]
+  amdgpu_targets = amdgpu_targets_str.split(",")
+  for amdgpu_target in amdgpu_targets:
+    if amdgpu_target[:3] != "gfx" or not amdgpu_target[3:].isdigit():
+      auto_configure_fail("Invalid AMDGPU target: %s" % amdgpu_target)
+  return amdgpu_targets
+
+def _cpu_value(repository_ctx):
+  """Returns the name of the host operating system.
+
+  Args:
+    repository_ctx: The repository context.
+
+  Returns:
+    A string containing the name of the host operating system.
+  """
+  os_name = repository_ctx.os.name.lower()
+  if os_name.startswith("mac os"):
+    return "Darwin"
+  if os_name.find("windows") != -1:
+    return "Windows"
+  result = repository_ctx.execute(["uname", "-s"])
+  return result.stdout.strip()
+
+def _lib_name(lib, cpu_value, version="", static=False):
+  """Constructs the platform-specific name of a library.
+
+  Args:
+    lib: The name of the library, such as "hip"
+    cpu_value: The name of the host operating system.
+    version: The version of the library.
+    static: True the library is static or False if it is a shared object.
+
+  Returns:
+    The platform-specific name of the library.
+  """
+  if cpu_value in ("Linux"):
+    if static:
+      return "lib%s.a" % lib
+    else:
+      if version:
+        version = ".%s" % version
+      return "lib%s.so%s" % (lib, version)
+  elif cpu_value == "Windows":
+      return "%s.lib" % lib
+  elif cpu_value == "Darwin":
+      if static:
+          return "lib%s.a" % lib
+      elif version:
+          version = ".%s" % version
+      return "lib%s%s.dylib" % (lib, version)
+  else:
+    auto_configure_fail("Invalid cpu_value: %s" % cpu_value)
+
+def _find_rocm_lib(lib, repository_ctx, cpu_value, basedir, version="",
+                   static=False):
+  """Finds the given ROCm libraries on the system.
+
+  Args:
+    lib: The name of the library, such as "hip"
+    repository_ctx: The repository context.
+    cpu_value: The name of the host operating system.
+    basedir: The install directory of ROCm.
+    version: The version of the library.
+    static: True if static library, False if shared object.
+
+  Returns:
+    Returns a struct with the following fields:
+      file_name: The basename of the library found on the system.
+      path: The full path to the library.
+  """
+  file_name = _lib_name(lib, cpu_value, version, static)
+  if cpu_value == "Linux":
+    path = repository_ctx.path("%s/lib64/%s" % (basedir, file_name))
+    if path.exists:
+      return struct(file_name=file_name, path=str(path.realpath))
+    path = repository_ctx.path("%s/lib64/stubs/%s" % (basedir, file_name))
+    if path.exists:
+      return struct(file_name=file_name, path=str(path.realpath))
+    path = repository_ctx.path(
+        "%s/lib/x86_64-linux-gnu/%s" % (basedir, file_name))
+    if path.exists:
+      return struct(file_name=file_name, path=str(path.realpath))
+
+  path = repository_ctx.path("%s/lib/%s" % (basedir, file_name))
+  if path.exists:
+    return struct(file_name=file_name, path=str(path.realpath))
+  path = repository_ctx.path("%s/%s" % (basedir, file_name))
+  if path.exists:
+    return struct(file_name=file_name, path=str(path.realpath))
+
+  auto_configure_fail("Cannot find rocm library %s" % file_name)
+
+def _find_libs(repository_ctx, rocm_config):
+  """Returns the ROCm libraries on the system.
+
+  Args:
+    repository_ctx: The repository context.
+    rocm_config: The ROCm config as returned by _get_rocm_config
+
+  Returns:
+    Map of library names to structs of filename and path as returned by
+    _find_rocm_lib.
+  """
+  cpu_value = rocm_config.cpu_value
+  return {
+      "hip": _find_rocm_lib(
+          "hip_hcc", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path),
+      "rocblas": _find_rocm_lib(
+          "rocblas", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path + "/rocblas"),
+      "rocfft": _find_rocm_lib(
+          "rocfft", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path + "/rocfft"),
+      "hiprand": _find_rocm_lib(
+          "hiprand", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path + "/hiprand"),
+      "miopen": _find_rocm_lib(
+          "MIOpen", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path + "/miopen"),
+  }
+
+def _get_rocm_config(repository_ctx):
+  """Detects and returns information about the ROCm installation on the system.
+
+  Args:
+    repository_ctx: The repository context.
+
+  Returns:
+    A struct containing the following fields:
+      rocm_toolkit_path: The ROCm toolkit installation directory.
+      amdgpu_targets: A list of the system's AMDGPU targets.
+      cpu_value: The name of the host operating system.
+  """
+  cpu_value = _cpu_value(repository_ctx)
+  rocm_toolkit_path = _rocm_toolkit_path(repository_ctx)
+  return struct(
+      rocm_toolkit_path = rocm_toolkit_path,
+      amdgpu_targets = _amdgpu_targets(repository_ctx),
+      cpu_value = cpu_value)
+
+def _tpl(repository_ctx, tpl, substitutions={}, out=None):
+  if not out:
+    out = tpl.replace(":", "/")
+  repository_ctx.template(
+      out,
+      Label("//third_party/gpus/%s.tpl" % tpl),
+      substitutions)
+
+
+def _file(repository_ctx, label):
+  repository_ctx.template(
+      label.replace(":", "/"),
+      Label("//third_party/gpus/%s.tpl" % label),
+      {})
+
+
+_DUMMY_CROSSTOOL_BZL_FILE = """
+def error_gpu_disabled():
+  fail("ERROR: Building with --config=rocm but TensorFlow is not configured " +
+       "to build with GPU support. Please re-run ./configure and enter 'Y' " +
+       "at the prompt to build with GPU support.")
+
+  native.genrule(
+      name = "error_gen_crosstool",
+      outs = ["CROSSTOOL"],
+      cmd = "echo 'Should not be run.' && exit 1",
+  )
+
+  native.filegroup(
+      name = "crosstool",
+      srcs = [":CROSSTOOL"],
+      output_licenses = ["unencumbered"],
+  )
+"""
+
+
+_DUMMY_CROSSTOOL_BUILD_FILE = """
+load("//crosstool:error_gpu_disabled.bzl", "error_gpu_disabled")
+
+error_gpu_disabled()
+"""
+
+def _create_dummy_repository(repository_ctx):
+  cpu_value = _cpu_value(repository_ctx)
+
+  # Set up BUILD file for rocm/.
+  _tpl(repository_ctx, "rocm:build_defs.bzl",
+       {
+           "%{rocm_is_configured}": "False",
+           "%{rocm_extra_copts}": "[]"
+       })
+  _tpl(repository_ctx, "rocm:BUILD",
+       {
+           "%{hip_lib}": _lib_name("hip", cpu_value),
+           "%{rocblas_lib}": _lib_name("rocblas", cpu_value),
+           "%{miopen_lib}": _lib_name("miopen", cpu_value),
+           "%{rocfft_lib}": _lib_name("rocfft", cpu_value),
+           "%{hiprand_lib}": _lib_name("hiprand", cpu_value),
+           "%{rocm_include_genrules}": '',
+           "%{rocm_headers}": '',
+       })
+
+  # Create dummy files for the ROCm toolkit since they are still required by
+  # tensorflow/core/platform/default/build_config:rocm.
+  repository_ctx.file("rocm/hip/include/hip/hip_runtime.h", "")
+
+  # Set up rocm_config.h, which is used by
+  # tensorflow/stream_executor/dso_loader.cc.
+  _tpl(repository_ctx, "rocm:rocm_config.h",
+       {
+           "%{rocm_toolkit_path}": _DEFAULT_ROCM_TOOLKIT_PATH,
+       }, "rocm/rocm/rocm_config.h")
+
+  # If rocm_configure is not configured to build with GPU support, and the user
+  # attempts to build with --config=rocm, add a dummy build rule to intercept
+  # this and fail with an actionable error message.
+  repository_ctx.file("crosstool/error_gpu_disabled.bzl",
+                      _DUMMY_CROSSTOOL_BZL_FILE)
+  repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
+
+def _execute(repository_ctx, cmdline, error_msg=None, error_details=None,
+             empty_stdout_fine=False):
+  """Executes an arbitrary shell command.
+
+  Args:
+    repository_ctx: the repository_ctx object
+    cmdline: list of strings, the command to execute
+    error_msg: string, a summary of the error if the command fails
+    error_details: string, details about the error or steps to fix it
+    empty_stdout_fine: bool, if True, an empty stdout result is fine, otherwise
+      it's an error
+  Return:
+    the result of repository_ctx.execute(cmdline)
+  """
+  result = repository_ctx.execute(cmdline)
+  if result.stderr or not (empty_stdout_fine or result.stdout):
+    auto_configure_fail(
+        "\n".join([
+            error_msg.strip() if error_msg else "Repository command failed",
+            result.stderr.strip(),
+            error_details if error_details else ""]))
+  return result
+
+def _norm_path(path):
+  """Returns a path with '/' and remove the trailing slash."""
+  path = path.replace("\\", "/")
+  if path[-1] == "/":
+    path = path[:-1]
+  return path
+
+def _symlink_genrule_for_dir(repository_ctx, src_dir, dest_dir, genrule_name,
+    src_files = [], dest_files = []):
+  """Returns a genrule to symlink(or copy if on Windows) a set of files.
+
+  If src_dir is passed, files will be read from the given directory; otherwise
+  we assume files are in src_files and dest_files
+  """
+  if src_dir != None:
+    src_dir = _norm_path(src_dir)
+    dest_dir = _norm_path(dest_dir)
+    files = _read_dir(repository_ctx, src_dir)
+    # Create a list with the src_dir stripped to use for outputs.
+    dest_files = files.replace(src_dir, '').splitlines()
+    src_files = files.splitlines()
+  command = []
+  # We clear folders that might have been generated previously to avoid
+  # undesired inclusions
+  command.append('if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi')
+  command.append('if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi')
+  outs = []
+  for i in range(len(dest_files)):
+    if dest_files[i] != "":
+      # If we have only one file to link we do not want to use the dest_dir, as
+      # $(@D) will include the full path to the file.
+      dest = '$(@D)/' + dest_dir + dest_files[i] if len(dest_files) != 1 else '$(@D)/' + dest_files[i]
+      # On Windows, symlink is not supported, so we just copy all the files.
+      cmd = 'ln -s'
+      command.append(cmd + ' "%s" "%s"' % (src_files[i] , dest))
+      outs.append('        "' + dest_dir + dest_files[i] + '",')
+  genrule = _genrule(src_dir, genrule_name, " && ".join(command),
+                     "\n".join(outs))
+  return genrule
+
+def _genrule(src_dir, genrule_name, command, outs):
+  """Returns a string with a genrule.
+
+  Genrule executes the given command and produces the given outputs.
+  """
+  return (
+      'genrule(\n' +
+      '    name = "' +
+      genrule_name + '",\n' +
+      '    outs = [\n' +
+      outs +
+      '\n    ],\n' +
+      '    cmd = """\n' +
+      command +
+      '\n   """,\n' +
+      ')\n'
+  )
+
+def _read_dir(repository_ctx, src_dir):
+  """Returns a string with all files in a directory.
+
+  Finds all files inside a directory, traversing subfolders and following
+  symlinks. The returned string contains the full path of all files
+  separated by line breaks.
+  """
+  find_result = _execute(
+      repository_ctx, ["find", src_dir, "-follow", "-type", "f"],
+      empty_stdout_fine=True)
+  result = find_result.stdout
+  return result
+
+def _compute_rocm_extra_copts(repository_ctx, amdgpu_targets):
+  if False:
+    amdgpu_target_flags = ["--amdgpu-target=" +
+        amdgpu_target for amdgpu_target in amdgpu_targets]
+  else:
+    # AMDGPU targets are handled in the "crosstool_wrapper_driver_is_not_gcc"
+    amdgpu_target_flags = []
+  return str(amdgpu_target_flags)
+
+def _create_local_rocm_repository(repository_ctx):
+  """Creates the repository containing files set up to build with ROCm."""
+  rocm_config = _get_rocm_config(repository_ctx)
+
+  # Set up symbolic links for the rocm toolkit by creating genrules to do
+  # symlinking. We create one genrule for each directory we want to track under
+  # rocm_toolkit_path
+  rocm_toolkit_path = rocm_config.rocm_toolkit_path
+  rocm_include_path = rocm_toolkit_path + "/include"
+  genrules = [_symlink_genrule_for_dir(repository_ctx,
+      rocm_include_path, "rocm/include", "rocm-include")]
+  genrules.append(_symlink_genrule_for_dir(repository_ctx,
+      rocm_toolkit_path + "/rocfft/include", "rocm/include/rocfft", "rocfft-include"))
+  genrules.append(_symlink_genrule_for_dir(repository_ctx,
+      rocm_toolkit_path + "/rocblas/include", "rocm/include/rocblas", "rocblas-include"))
+  genrules.append(_symlink_genrule_for_dir(repository_ctx,
+      rocm_toolkit_path + "/miopen/include", "rocm/include/miopen", "miopen-include"))
+
+  rocm_libs = _find_libs(repository_ctx, rocm_config)
+  rocm_lib_src = []
+  rocm_lib_dest = []
+  for lib in rocm_libs.values():
+    rocm_lib_src.append(lib.path)
+    rocm_lib_dest.append("rocm/lib/" + lib.file_name)
+  genrules.append(_symlink_genrule_for_dir(repository_ctx, None, "", "rocm-lib",
+                                       rocm_lib_src, rocm_lib_dest))
+
+  included_files = _read_dir(repository_ctx, rocm_include_path).replace(
+      rocm_include_path, '').splitlines()
+
+  # Set up BUILD file for rocm/
+  _tpl(repository_ctx, "rocm:build_defs.bzl",
+       {
+           "%{rocm_is_configured}": "True",
+           "%{rocm_extra_copts}": _compute_rocm_extra_copts(
+               repository_ctx, rocm_config.amdgpu_targets),
+
+       })
+  _tpl(repository_ctx, "rocm:BUILD",
+       {
+           "%{hip_lib}": rocm_libs["hip"].file_name,
+           "%{rocblas_lib}": rocm_libs["rocblas"].file_name,
+           "%{rocfft_lib}": rocm_libs["rocfft"].file_name,
+           "%{hiprand_lib}": rocm_libs["hiprand"].file_name,
+           "%{miopen_lib}": rocm_libs["miopen"].file_name,
+           "%{rocm_include_genrules}": "\n".join(genrules),
+           "%{rocm_headers}": ('":rocm-include",\n' +
+                               '":rocfft-include",\n' +
+                               '":rocblas-include",\n' +
+                               '":miopen-include",'),
+       })
+  # Set up crosstool/
+  _tpl(repository_ctx, "crosstool:BUILD", {"%{linker_files}": ":empty", "%{win_linker_files}": ":empty"})
+  cc = find_cc(repository_ctx)
+  host_compiler_includes = _host_compiler_includes(repository_ctx, cc)
+  rocm_defines = {
+           "%{rocm_include_path}": _rocm_include_path(repository_ctx,
+                                                      rocm_config),
+           "%{host_compiler_includes}": host_compiler_includes,
+           "%{clang_path}": str(cc),
+       }
+
+  _tpl(repository_ctx, "crosstool:CROSSTOOL_hipcc", rocm_defines, out="crosstool/CROSSTOOL")
+
+  _tpl(repository_ctx,
+       "crosstool:clang/bin/crosstool_wrapper_driver_rocm",
+       {
+           "%{cpu_compiler}": str(cc),
+           "%{hipcc_path}": "/opt/rocm/bin/hipcc",
+           "%{gcc_host_compiler_path}": str(cc),
+           "%{rocm_amdgpu_targets}": ",".join(
+               ["\"%s\"" % c for c in rocm_config.amdgpu_targets]),
+       })
+
+  # Set up rocm_config.h, which is used by
+  # tensorflow/stream_executor/dso_loader.cc.
+  _tpl(repository_ctx, "rocm:rocm_config.h",
+       {
+           "%{rocm_amdgpu_targets}": ",".join(
+               ["\"%s\"" % c for c in rocm_config.amdgpu_targets]),
+           "%{rocm_toolkit_path}": rocm_config.rocm_toolkit_path,
+       }, "rocm/rocm/rocm_config.h")
+
+
+def _create_remote_rocm_repository(repository_ctx, remote_config_repo):
+  """Creates pointers to a remotely configured repo set up to build with ROCm."""
+  _tpl(repository_ctx, "rocm:build_defs.bzl",
+       {
+           "%{rocm_is_configured}": "True",
+           "%{rocm_extra_copts}": _compute_rocm_extra_copts(
+               repository_ctx, #_compute_capabilities(repository_ctx)
+            ),
+
+       })
+  _tpl(repository_ctx, "rocm:remote.BUILD",
+       {
+           "%{remote_rocm_repo}": remote_config_repo,
+       }, "rocm/BUILD")
+  _tpl(repository_ctx, "crosstool:remote.BUILD", {
+           "%{remote_rocm_repo}": remote_config_repo,
+       }, "crosstool/BUILD")
+
+def _rocm_autoconf_impl(repository_ctx):
+  """Implementation of the rocm_autoconf repository rule."""
+  if not _enable_rocm(repository_ctx):
+    _create_dummy_repository(repository_ctx)
+  else:
+    if _TF_ROCM_CONFIG_REPO in repository_ctx.os.environ:
+      _create_remote_rocm_repository(repository_ctx,
+          repository_ctx.os.environ[_TF_ROCM_CONFIG_REPO])
+    else:
+      _create_local_rocm_repository(repository_ctx)
+
+
+rocm_configure = repository_rule(
+    implementation = _rocm_autoconf_impl,
+    environ = [
+        _GCC_HOST_COMPILER_PATH,
+        "TF_NEED_ROCM",
+        _ROCM_TOOLKIT_PATH,
+        _TF_ROCM_VERSION,
+        _TF_MIOPEN_VERSION,
+        _TF_ROCM_AMDGPU_TARGETS,
+        _TF_ROCM_CONFIG_REPO,
+    ],
+)
+
+"""Detects and configures the local ROCm toolchain.
+
+Add the following to your WORKSPACE FILE:
+
+```python
+rocm_configure(name = "local_config_rocm")
+```
+
+Args:
+  name: A unique name for this workspace rule.
+"""
diff --git a/tools/bazel.rc b/tools/bazel.rc
index 601e07ffdd..afc5cf56ab 100644
--- a/tools/bazel.rc
+++ b/tools/bazel.rc
@@ -42,6 +42,9 @@ build:download_clang_use_lld --linkopt='-fuse-ld=lld'
 build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
 build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true
 
+build:rocm --crosstool_top=@local_config_rocm//crosstool:toolchain
+build:rocm --define=using_rocm=true --define=using_rocm_hipcc=true
+
 build:cuda_clang --crosstool_top=@local_config_cuda//crosstool:toolchain
 build:cuda_clang --define=using_cuda=true --define=using_cuda_clang=true --define=using_clang=true
 
-- 
cgit v1.2.3