From 69d3b8faf41791834301a74a05e288964940427d Mon Sep 17 00:00:00 2001 From: "Wen-Heng (Jack) Chung" Date: Fri, 22 Jun 2018 23:09:43 -0500 Subject: [ROCm] bazel build system and continuous integration logic The commit contains following components to support TensorFlow on ROCm platform - bazel build system - continuous integration logic Authors: - Jack Chung: jack.chung@amd.com - Jeffrey Poznanovic: Jeffrey.Poznanovic@amd.com - Peng Sun: Peng.Sun@amd.com --- configure.py | 20 + tensorflow/core/BUILD | 4 +- tensorflow/core/kernels/BUILD | 3 +- tensorflow/tensorflow.bzl | 67 ++- tensorflow/tools/ci_build/Dockerfile.rocm | 97 +++ tensorflow/tools/ci_build/builds/docker_test.sh | 9 +- tensorflow/tools/ci_build/builds/pip.sh | 4 +- .../tools/ci_build/builds/with_the_same_user | 6 + tensorflow/tools/ci_build/ci_build.sh | 11 +- tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh | 1 + .../tools/ci_build/linux/cpu/run_py2_core.sh | 1 + .../tools/ci_build/linux/cpu/run_py3_contrib.sh | 1 + .../tools/ci_build/linux/cpu/run_py3_core.sh | 1 + tensorflow/tools/ci_build/linux/libtensorflow.sh | 3 + .../tools/ci_build/linux/libtensorflow_cpu.sh | 1 + .../tools/ci_build/linux/libtensorflow_docker.sh | 6 + .../tools/ci_build/linux/libtensorflow_rocm.sh | 22 + .../tools/ci_build/linux/rocm/run_cc_core.sh | 39 ++ .../tools/ci_build/linux/rocm/run_py3_core.sh | 39 ++ .../tools/ci_build/osx/cpu/run_py2_cc_core.sh | 1 + tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh | 1 + tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh | 1 + .../tools/ci_build/osx/libtensorflow_rocm.sh | 36 ++ .../tools/ci_build/xla/linux/rocm/run_py3.sh | 41 ++ tensorflow/workspace.bzl | 2 + third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl | 158 +++++ .../clang/bin/crosstool_wrapper_driver_rocm.tpl | 241 ++++++++ third_party/gpus/rocm/BUILD | 0 third_party/gpus/rocm/BUILD.tpl | 99 +++ third_party/gpus/rocm/build_defs.bzl.tpl | 32 + third_party/gpus/rocm/rocm_config.h.tpl | 21 + third_party/gpus/rocm_configure.bzl | 663 +++++++++++++++++++++ tools/bazel.rc | 3 + 33 files changed, 1611 insertions(+), 23 deletions(-) create mode 100644 tensorflow/tools/ci_build/Dockerfile.rocm create mode 100755 tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh create mode 100755 tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh create mode 100755 tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh create mode 100755 tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh create mode 100755 tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh create mode 100644 third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl create mode 100755 third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl create mode 100644 third_party/gpus/rocm/BUILD create mode 100644 third_party/gpus/rocm/BUILD.tpl create mode 100644 third_party/gpus/rocm/build_defs.bzl.tpl create mode 100644 third_party/gpus/rocm/rocm_config.h.tpl create mode 100644 third_party/gpus/rocm_configure.bzl diff --git a/configure.py b/configure.py index 361bd4764d..4f998511aa 100644 --- a/configure.py +++ b/configure.py @@ -1521,6 +1521,13 @@ def main(): else: set_trisycl_include_dir(environ_cp) + set_action_env_var(environ_cp, 'TF_NEED_ROCM', 'ROCm', False) + if (environ_cp.get('TF_NEED_ROCM') == '1' and + 'LD_LIBRARY_PATH' in environ_cp and environ_cp.get( + 'LD_LIBRARY_PATH') != '1'): + write_action_env_to_bazelrc('LD_LIBRARY_PATH', + environ_cp.get('LD_LIBRARY_PATH')) + set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False) if (environ_cp.get('TF_NEED_CUDA') == '1' and 'TF_CUDA_CONFIG_REPO' not in environ_cp): @@ -1561,6 +1568,19 @@ def main(): write_to_bazelrc('build --config=download_clang') write_to_bazelrc('test --config=download_clang') + # SYCL / ROCm / CUDA are mutually exclusive. + # At most 1 GPU platform can be configured. + gpu_platform_count = 0 + if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1': + gpu_platform_count += 1 + if environ_cp.get('TF_NEED_ROCM') == '1': + gpu_platform_count += 1 + if environ_cp.get('TF_NEED_CUDA') == '1': + gpu_platform_count += 1 + if gpu_platform_count >= 2: + raise UserInputError('SYCL / CUDA / ROCm are mututally exclusive. ' + 'At most 1 GPU platform can be configured.') + set_build_var(environ_cp, 'TF_NEED_MPI', 'MPI', 'with_mpi_support', False) if environ_cp.get('TF_NEED_MPI') == '1': set_mpi_home(environ_cp) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index c06fea130f..d5dfb8c813 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -146,7 +146,7 @@ load( "if_static", "tf_cuda_tests_tags", ) -load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda", "if_cuda_is_configured") load("@io_bazel_rules_closure//closure:defs.bzl", "closure_proto_library") load( "//third_party/mkl:build_defs.bzl", @@ -2941,7 +2941,7 @@ tf_cuda_library( "platform/device_tracer.h", ], copts = tf_copts(), - cuda_deps = tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps(), + cuda_deps = if_cuda_is_configured(tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps()), visibility = ["//visibility:private"], deps = [ ":core_cpu_internal", diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 25063ac823..68fa8fa481 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -55,7 +55,8 @@ load( "if_mkl_ml", "mkl_deps", ) -load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda", "if_cuda_is_configured") +load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm", "if_rocm_is_configured") config_setting( # Add "--define tensorflow_xsmm=1" to your build command to use libxsmm for diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index adac895a17..f51a628ca3 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -17,8 +17,15 @@ load( ) load( "@local_config_cuda//cuda:build_defs.bzl", - "cuda_default_copts", "if_cuda", + "if_cuda_is_configured", + "cuda_default_copts", +) +load( + "@local_config_rocm//rocm:build_defs.bzl", + "if_rocm", + "if_rocm_is_configured", + "rocm_default_copts", ) load( "//third_party/mkl:build_defs.bzl", @@ -860,12 +867,14 @@ def tf_cuda_only_cc_test( srcs = srcs + tf_binary_additional_srcs(), size = size, args = args, - copts = _cuda_copts() + tf_copts(), + copts = _cuda_copts() + _rocm_copts() + tf_copts(), data = data + tf_binary_dynamic_kernel_dsos(kernels), - deps = deps + tf_binary_dynamic_kernel_deps(kernels) + if_cuda([ - clean_dep("//tensorflow/core:cuda"), - clean_dep("//tensorflow/core:gpu_lib"), - ]), + deps = deps + tf_binary_dynamic_kernel_deps(kernels) + + if_cuda_is_configured([ + clean_dep("//tensorflow/core:cuda"), + clean_dep("//tensorflow/core:gpu_lib")]) + + if_rocm_is_configured([ + clean_dep("//tensorflow/core:gpu_lib")]), linkopts = if_not_windows(["-lpthread", "-lm"]) + linkopts + _rpath_linkopts(name), linkstatic = linkstatic or select({ # cc_tests with ".so"s in srcs incorrectly link on Darwin @@ -1000,7 +1009,7 @@ register_extension_info( label_regex_for_dep = "{extension_name}", ) -def _cuda_copts(): +def _cuda_copts(opts = []): """Gets the appropriate set of copts for (maybe) CUDA compilation. If we're doing CUDA compilation, returns copts for our particular CUDA @@ -1016,13 +1025,31 @@ def _cuda_copts(): "@local_config_cuda//cuda:using_clang": ([ "-fcuda-flush-denormals-to-zero", ]), - }) + }) + if_cuda_is_configured(opts) + +def _rocm_copts(opts = []): + """Gets the appropriate set of copts for (maybe) ROCm compilation. + + If we're doing ROCm compilation, returns copts for our particular ROCm + compiler. If we're not doing ROCm compilation, returns an empty list. + + """ + return rocm_default_copts() + select({ + "//conditions:default": [], + "@local_config_rocm//rocm:using_hipcc": ([ + "", + ]) + }) + if_rocm_is_configured(opts) # Build defs for TensorFlow kernels # When this target is built using --config=cuda, a cc_library is built # that passes -DGOOGLE_CUDA=1 and '-x cuda', linking in additional # libraries needed by GPU kernels. +# +# When this target is built using --config=rocm, a cc_library is built +# that passes -DTENSORFLOW_USE_ROCM and '-x rocm', linking in additional +# libraries needed by GPU kernels. def tf_gpu_kernel_library( srcs, copts = [], @@ -1030,16 +1057,18 @@ def tf_gpu_kernel_library( deps = [], hdrs = [], **kwargs): - copts = copts + _cuda_copts() + if_cuda(cuda_copts) + tf_copts() + copts = copts + tf_copts() + _cuda_copts(opts = cuda_copts) + _rocm_copts(opts = cuda_copts) kwargs["features"] = kwargs.get("features", []) + ["-use_header_modules"] native.cc_library( srcs = srcs, hdrs = hdrs, copts = copts, - deps = deps + if_cuda([ + deps = deps + if_cuda_is_configured([ clean_dep("//tensorflow/core:cuda"), clean_dep("//tensorflow/core:gpu_lib"), + ]) + if_rocm_is_configured([ + clean_dep("//tensorflow/core:gpu_lib"), ]), alwayslink = 1, **kwargs @@ -1075,11 +1104,13 @@ def tf_cuda_library(deps = None, cuda_deps = None, copts = tf_copts(), **kwargs) kwargs["features"] = kwargs.get("features", []) + ["-use_header_modules"] native.cc_library( - deps = deps + if_cuda(cuda_deps + [ + deps = deps + if_cuda_is_configured(cuda_deps + [ clean_dep("//tensorflow/core:cuda"), - "@local_config_cuda//cuda:cuda_headers", + "@local_config_cuda//cuda:cuda_headers" + ]) + if_rocm_is_configured(cuda_deps + [ + "@local_config_rocm//rocm:rocm_headers" ]), - copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_mkl(["-DINTEL_MKL=1"]) + + copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_rocm(["-DTENSORFLOW_USE_ROCM=1"]) + if_mkl(["-DINTEL_MKL=1"]) + if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) + if_tensorrt(["-DGOOGLE_TENSORRT=1"])), **kwargs @@ -1459,6 +1490,9 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [ "@local_config_cuda//cuda:cuda_headers", "@local_config_cuda//cuda:cudart_static", ] + rocm_deps = [ + clean_dep("//tensorflow/core:stream_executor_headers_lib"), + ] deps = deps + tf_custom_op_library_additional_deps() if gpu_srcs: basename = name.split(".")[0] @@ -1467,13 +1501,14 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [ srcs = gpu_srcs, copts = _cuda_copts() + if_tensorrt(["-DGOOGLE_TENSORRT=1"]), features = if_cuda(["-use_header_modules"]), - deps = deps + if_cuda(cuda_deps), + deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps) ) cuda_deps.extend([":" + basename + "_gpu"]) + rocm_deps.extend([":" + basename + "_gpu"]) check_deps( name = name + "_check_deps", - deps = deps + if_cuda(cuda_deps), + deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps), disallowed_deps = [ clean_dep("//tensorflow/core:framework"), clean_dep("//tensorflow/core:lib"), @@ -1482,7 +1517,7 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [ tf_cc_shared_object( name = name, srcs = srcs, - deps = deps + if_cuda(cuda_deps), + deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps), data = if_static([name + "_check_deps"]), copts = tf_copts(is_external = True), features = ["windows_export_all_symbols"], diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm new file mode 100644 index 0000000000..aadaa8bac1 --- /dev/null +++ b/tensorflow/tools/ci_build/Dockerfile.rocm @@ -0,0 +1,97 @@ +# This Dockerfile provides a starting point for a ROCm installation of +# MIOpen and tensorflow. +FROM ubuntu:xenial +MAINTAINER Jeff Poznanovic + +ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/debian/ +ARG ROCM_PATH=/opt/rocm + +ENV DEBIAN_FRONTEND noninteractive +ENV TF_NEED_ROCM 1 +ENV HOME /root/ +RUN apt update && apt install -y wget software-properties-common + +# Add rocm repository +RUN apt-get clean all +RUN wget -qO - $DEB_ROCM_REPO/rocm.gpg.key | apt-key add - +RUN sh -c "echo deb [arch=amd64] $DEB_ROCM_REPO xenial main > /etc/apt/sources.list.d/rocm.list" + +# Install misc pkgs +RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + clang-3.8 \ + clang-format-3.8 \ + clang-tidy-3.8 \ + cmake \ + cmake-qt-gui \ + ssh \ + curl \ + apt-utils \ + pkg-config \ + g++-multilib \ + git \ + libunwind-dev \ + libfftw3-dev \ + libelf-dev \ + libncurses5-dev \ + libpthread-stubs0-dev \ + vim \ + gfortran \ + libboost-program-options-dev \ + libssl-dev \ + libboost-dev \ + libboost-system-dev \ + libboost-filesystem-dev \ + rpm \ + libnuma-dev \ + virtualenv \ + python-pip \ + python3-pip \ + wget && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install rocm pkgs +RUN apt-get update --allow-insecure-repositories && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ + rocm-dev rocm-libs rocm-utils \ + rocfft miopen-hip miopengemm rocblas hipblas rocrand \ + rocm-profiler cxlactivitylogger && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN cd ~ && git clone https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP.git +RUN cd ~/HIP && mkdir -p build && cd build && cmake .. && make package -j && dpkg -i *.deb + +ENV HCC_HOME=$ROCM_PATH/hcc +ENV HIP_PATH=$ROCM_PATH/hip +ENV OPENCL_ROOT=$ROCM_PATH/opencl +ENV PATH="$HCC_HOME/bin:$HIP_PATH/bin:${PATH}" +ENV PATH="$ROCM_PATH/bin:${PATH}" +ENV PATH="$OPENCL_ROOT/bin:${PATH}" + +# Add target file to help determine which device(s) to build for +RUN echo -e "gfx803\ngfx900" >> /opt/rocm/bin/target.lst + +# Setup environment variables, and add those environment variables at the end of ~/.bashrc +ARG HCC_HOME=/opt/rocm/hcc +ARG HIP_PATH=/opt/rocm/hip +ARG PATH=$HCC_HOME/bin:$HIP_PATH/bin:$PATH + +# Copy and run the install scripts. +COPY install/*.sh /install/ +ARG DEBIAN_FRONTEND=noninteractive +RUN /install/install_bootstrap_deb_packages.sh +RUN add-apt-repository -y ppa:openjdk-r/ppa && \ + add-apt-repository -y ppa:george-edison55/cmake-3.x +RUN /install/install_deb_packages.sh +RUN /install/install_pip_packages.sh +RUN /install/install_bazel.sh +RUN /install/install_golang.sh + +# Set up the master bazelrc configuration file. +COPY install/.bazelrc /etc/bazel.bazelrc + +# Configure the build for our CUDA configuration. +ENV TF_NEED_ROCM 1 + diff --git a/tensorflow/tools/ci_build/builds/docker_test.sh b/tensorflow/tools/ci_build/builds/docker_test.sh index e337ea4b05..38891b60e5 100755 --- a/tensorflow/tools/ci_build/builds/docker_test.sh +++ b/tensorflow/tools/ci_build/builds/docker_test.sh @@ -19,7 +19,7 @@ # # Usage: docker_test.sh # Arguments: -# IMAGE_TYPE : Type of the image: (CPU|GPU) +# IMAGE_TYPE : Type of the image: (CPU|GPU|ROCM) # TAG : Docker image tag # WHL_PATH : Path to the whl file to be installed inside the docker image # @@ -60,6 +60,8 @@ if [[ "${IMAGE_TYPE}" == "cpu" ]]; then DOCKERFILE="tensorflow/tools/docker/Dockerfile" elif [[ "${IMAGE_TYPE}" == "gpu" ]]; then DOCKERFILE="tensorflow/tools/docker/Dockerfile.gpu" +elif [[ "${IMAGE_TYPE}" == "rocm" ]]; then + DOCKERFILE="tensorflow/tools/docker/Dockerfile.rocm" else die "Unrecognized image type: $1" fi @@ -106,13 +108,16 @@ if [ "${IMAGE_TYPE}" == "gpu" ]; then devices=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') libs=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}') GPU_EXTRA_PARAMS="${devices} ${libs}" +elif [ "${IMAGE_TYPE}" == "rocm" ]; then + ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video" else GPU_EXTRA_PARAMS="" + ROCM_EXTRA_PARAMS="" fi # Run docker image with source directory mapped docker run -v ${BASE_DIR}:/tensorflow-src -w /tensorflow-src \ -${GPU_EXTRA_PARAMS} \ +${GPU_EXTRA_PARAMS} ${ROCM_EXTRA_PARAMS} \ "${DOCKER_IMG_TAG}" \ /bin/bash -c "tensorflow/tools/ci_build/builds/run_pip_tests.sh && "\ "tensorflow/tools/ci_build/builds/test_tutorials.sh && "\ diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh index fef121ab5a..6543779022 100755 --- a/tensorflow/tools/ci_build/builds/pip.sh +++ b/tensorflow/tools/ci_build/builds/pip.sh @@ -132,6 +132,7 @@ echo "Using Bazel flags: ${BAZEL_FLAGS}" PIP_BUILD_TARGET="//tensorflow/tools/pip_package:build_pip_package" GPU_FLAG="" if [[ ${CONTAINER_TYPE} == "cpu" ]] || \ + [[ ${CONTAINER_TYPE} == "rocm" ]] || \ [[ ${CONTAINER_TYPE} == "debian.jessie.cpu" ]]; then bazel build ${BAZEL_FLAGS} ${PIP_BUILD_TARGET} || \ die "Build failed." @@ -255,7 +256,8 @@ if [[ $(uname) == "Linux" ]]; then die "ERROR: Cannot find repaired wheel." fi # Copy and rename for gpu manylinux as we do not want auditwheel to package in libcudart.so - elif [[ ${CONTAINER_TYPE} == "gpu" ]]; then + elif [[ ${CONTAINER_TYPE} == "gpu" ]] || \ + [[ ${CONTAINER_TYPE} == "rocm" ]]; then WHL_PATH=${AUDITED_WHL_NAME} cp ${WHL_DIR}/${WHL_BASE_NAME} ${WHL_PATH} echo "Copied manylinx1 wheel file at ${WHL_PATH}" diff --git a/tensorflow/tools/ci_build/builds/with_the_same_user b/tensorflow/tools/ci_build/builds/with_the_same_user index b216e3549f..1cc5aed15d 100755 --- a/tensorflow/tools/ci_build/builds/with_the_same_user +++ b/tensorflow/tools/ci_build/builds/with_the_same_user @@ -48,6 +48,12 @@ getent passwd "${CI_BUILD_UID}" || adduser ${ADDUSER_OPTS} \ usermod -a -G sudo "${CI_BUILD_USER}" echo "${CI_BUILD_USER} ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-nopasswd-sudo +if [[ "${TF_NEED_ROCM}" -eq 1 ]]; then + # ROCm requires the video group in order to use the GPU for compute. If it + # exists on the host, add it to the container. + getent group video || addgroup video && adduser "${CI_BUILD_USER}" video +fi + if [ -e /root/.bazelrc ]; then cp /root/.bazelrc "${CI_BUILD_HOME}/.bazelrc" chown "${CI_BUILD_UID}:${CI_BUILD_GID}" "${CI_BUILD_HOME}/.bazelrc" diff --git a/tensorflow/tools/ci_build/ci_build.sh b/tensorflow/tools/ci_build/ci_build.sh index 77265e0f50..eab0616513 100755 --- a/tensorflow/tools/ci_build/ci_build.sh +++ b/tensorflow/tools/ci_build/ci_build.sh @@ -18,7 +18,7 @@ # # # CONTAINER_TYPE: Type of the docker container used the run the build: -# e.g., (cpu | gpu | android | tensorboard) +# e.g., (cpu | gpu | rocm | android | tensorboard) # # DOCKERFILE_PATH: (Optional) Path to the Dockerfile used for docker build. # If this optional value is not supplied (via the @@ -103,6 +103,14 @@ if [[ "${CONTAINER_TYPE}" != gpu* ]]; then GPU_EXTRA_PARAMS="" fi +# Add extra params for rocm devices and libraries for ROCm container. +if [[ "${CONTAINER_TYPE}" == "rocm" ]]; then + ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video" +else + ROCM_EXTRA_PARAMS="" +fi + + # Determine the docker image name DOCKER_IMG_NAME="${BUILD_TAG}.${CONTAINER_TYPE}" @@ -159,6 +167,7 @@ ${DOCKER_BINARY} run --rm --pid=host \ -v ${WORKSPACE}:/workspace \ -w /workspace \ ${GPU_EXTRA_PARAMS} \ + ${ROCM_EXTRA_PARAMS} \ ${CI_DOCKER_EXTRA_PARAMS[@]} \ "${DOCKER_IMG_NAME}" \ ${CI_COMMAND_PREFIX[@]} \ diff --git a/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh b/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh index 8eeddcdb82..3b5c92d148 100755 --- a/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh +++ b/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh @@ -26,6 +26,7 @@ echo "" # Run configure. export TF_NEED_CUDA=0 +export TF_NEED_ROCM=0 export CC_OPT_FLAGS='-mavx' # Only running cc tests, python version does not matter. export PYTHON_BIN_PATH=`which python` diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh b/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh index 8eca1987f0..52eff6330f 100755 --- a/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh +++ b/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh @@ -26,6 +26,7 @@ echo "" # Run configure. export TF_NEED_CUDA=0 +export TF_NEED_ROCM=0 export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=`which python2` yes "" | $PYTHON_BIN_PATH configure.py diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh b/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh index f6fa9251d4..d12027599a 100755 --- a/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh +++ b/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh @@ -26,6 +26,7 @@ echo "" # Run configure. export TF_NEED_CUDA=0 +export TF_NEED_ROCM=0 export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=`which python3` yes "" | $PYTHON_BIN_PATH configure.py diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh b/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh index 51eb2cd7e6..7c531a4d68 100755 --- a/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh +++ b/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh @@ -26,6 +26,7 @@ echo "" # Run configure. export TF_NEED_CUDA=0 +export TF_NEED_ROCM=0 export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=`which python3` yes "" | $PYTHON_BIN_PATH configure.py diff --git a/tensorflow/tools/ci_build/linux/libtensorflow.sh b/tensorflow/tools/ci_build/linux/libtensorflow.sh index beef8e063b..3b6e15feb9 100755 --- a/tensorflow/tools/ci_build/linux/libtensorflow.sh +++ b/tensorflow/tools/ci_build/linux/libtensorflow.sh @@ -27,5 +27,8 @@ SUFFIX="-cpu-linux-" if [ "${TF_NEED_CUDA}" == "1" ]; then SUFFIX="-gpu-linux-" fi +if [ "${TF_NEED_ROCM}" == "1" ]; then + SUFFIX="-rocm-linux-" +fi build_libtensorflow_tarball "${SUFFIX}$(uname -m)" diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh index 4bf34dd299..b76262b6e9 100755 --- a/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh +++ b/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh @@ -19,4 +19,5 @@ set -ex SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" export TF_NEED_CUDA=0 +export TF_NEED_ROCM=0 "${SCRIPT_DIR}/libtensorflow_docker.sh" diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh index 60c974c36b..467b8dc808 100755 --- a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh +++ b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh @@ -38,6 +38,11 @@ if [ "${TF_NEED_CUDA}" == "1" ]; then DOCKER_BINARY="nvidia-docker" DOCKER_FILE="Dockerfile.gpu" fi +if [ "${TF_NEED_ROCM}" == "1" ]; then + DOCKER_IMAGE="tf-tensorflow-rocm" + DOCKER_BINARY="docker" + DOCKER_FILE="Dockerfile.rocm" +fi docker build \ -t "${DOCKER_IMAGE}" \ @@ -53,6 +58,7 @@ ${DOCKER_BINARY} run \ -e "TF_NEED_HDFS=0" \ -e "TF_NEED_CUDA=${TF_NEED_CUDA}" \ -e "TF_NEED_TENSORRT=${TF_NEED_CUDA}" \ + -e "TF_NEED_ROCM=${TF_NEED_ROCM}" \ -e "TF_NEED_OPENCL_SYCL=0" \ "${DOCKER_IMAGE}" \ "/workspace/tensorflow/tools/ci_build/linux/libtensorflow.sh" diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh b/tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh new file mode 100755 index 0000000000..c1ebbe3630 --- /dev/null +++ b/tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# Script to build a binary releases of libtensorflow with GPU support. + +set -ex +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +export TF_NEED_ROCM=1 +"${SCRIPT_DIR}/libtensorflow_docker.sh" diff --git a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh new file mode 100755 index 0000000000..200089f90e --- /dev/null +++ b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ============================================================================== + +set -e +set -x + +N_JOBS=$(grep -c ^processor /proc/cpuinfo) + +echo "" +echo "Bazel will use ${N_JOBS} concurrent job(s)." +echo "" + +# Run configure. +export PYTHON_BIN_PATH=`which python3` +export CC_OPT_FLAGS='-mavx' + +export TF_NEED_ROCM=1 + +yes "" | $PYTHON_BIN_PATH configure.py + +# Run bazel test command. Double test timeouts to avoid flakes. +bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchmark-test -k \ + --test_lang_filters=cc --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \ + --build_tests_only --test_output=errors --local_test_jobs=1 --config=opt \ + //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/... diff --git a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh new file mode 100755 index 0000000000..1d0b838c1b --- /dev/null +++ b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ============================================================================== + +set -e +set -x + +N_JOBS=$(grep -c ^processor /proc/cpuinfo) + +echo "" +echo "Bazel will use ${N_JOBS} concurrent job(s)." +echo "" + +# Run configure. +export PYTHON_BIN_PATH=`which python3` +export CC_OPT_FLAGS='-mavx' + +export TF_NEED_ROCM=1 + +yes "" | $PYTHON_BIN_PATH configure.py + +# Run bazel test command. Double test timeouts to avoid flakes. +bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchmark-test -k \ + --test_lang_filters=py --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \ + --build_tests_only --test_output=errors --local_test_jobs=1 --config=opt \ + //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/... diff --git a/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh b/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh index c7cc16e669..adee0d3171 100755 --- a/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh +++ b/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh @@ -27,6 +27,7 @@ echo "" # Run configure. export TF_NEED_CUDA=0 +export TF_NEED_ROCM=0 export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python2) yes "" | $PYTHON_BIN_PATH configure.py diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh index 9ae5fc6bea..06798adc03 100755 --- a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh +++ b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh @@ -26,6 +26,7 @@ source "${SCRIPT_DIR}/../builds/libtensorflow.sh" export PYTHON_BIN_PATH="/usr/bin/python" export TF_NEED_HDFS=0 export TF_NEED_CUDA=0 +export TF_NEED_ROCM=0 export TF_NEED_OPENCL_SYCL=0 export TF_NEED_MKL=0 export COMPUTECPP_PATH="/usr/local" diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh index d95fcdeb85..95f1992d7d 100755 --- a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh +++ b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh @@ -27,6 +27,7 @@ export TF_NEED_CUDA=1 export LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/extras/CUPTI/lib:${LD_LIBRARY_PATH}" export PYTHON_BIN_PATH="/usr/bin/python" export TF_NEED_HDFS=0 +export TF_NEED_ROCM=0 export TF_NEED_OPENCL_SYCL=0 export TF_NEED_MKL=0 export COMPUTECPP_PATH="/usr/local" diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh b/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh new file mode 100755 index 0000000000..aeabc0e39e --- /dev/null +++ b/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# Script to produce binary release of libtensorflow (C API, Java jars etc.). + +set -ex +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# See comments at the top of this file for details. +source "${SCRIPT_DIR}/../builds/libtensorflow.sh" + +# Configure script +export TF_NEED_ROCM=1 +export PYTHON_BIN_PATH="/usr/bin/python" +export TF_NEED_GCP=0 +export TF_NEED_HDFS=0 +export TF_NEED_CUDA=0 +export TF_NEED_OPENCL_SYCL=0 +export TF_NEED_MKL=0 +export COMPUTECPP_PATH="/usr/local" + +export PATH="/usr/local/cuda/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin" +build_libtensorflow_tarball "-gpu-darwin-$(uname -m)" diff --git a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh new file mode 100755 index 0000000000..a0de128020 --- /dev/null +++ b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ============================================================================== + +set -e +set -x + +N_JOBS=$(grep -c ^processor /proc/cpuinfo) + +echo "" +echo "Bazel will use ${N_JOBS} concurrent job(s)." +echo "" + +# Run configure. +export PYTHON_BIN_PATH=`which python3` + +export TF_NEED_ROCM=1 + +yes "" | $PYTHON_BIN_PATH configure.py +echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc + +bazel clean +# Run bazel test command. Double test timeouts to avoid flakes. +bazel test --config=rocm --test_tag_filters=-no_gpu,-benchmark-test,-no_oss -k \ + --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \ + --build_tests_only --test_output=errors --local_test_jobs=1 \ + --config=xla -- \ + //tensorflow/compiler/... diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 1e7c5d6790..87d1243563 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -1,6 +1,7 @@ # TensorFlow external dependencies that can be loaded in WORKSPACE files. load("//third_party/gpus:cuda_configure.bzl", "cuda_configure") +load("//third_party/gpus:rocm_configure.bzl", "rocm_configure") load("//third_party/tensorrt:tensorrt_configure.bzl", "tensorrt_configure") load("//third_party:nccl/nccl_configure.bzl", "nccl_configure") load("//third_party/mkl:build_defs.bzl", "mkl_repository") @@ -43,6 +44,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): sycl_configure(name = "local_config_sycl") syslibs_configure(name = "local_config_syslibs") python_configure(name = "local_config_python") + rocm_configure(name="local_config_rocm") initialize_third_party() diff --git a/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl b/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl new file mode 100644 index 0000000000..0e175b3ef6 --- /dev/null +++ b/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl @@ -0,0 +1,158 @@ +major_version: "local" +minor_version: "" +default_target_cpu: "same_as_host" + +default_toolchain { + cpu: "k8" + toolchain_identifier: "local_linux" +} +default_toolchain { + cpu: "piii" + toolchain_identifier: "local_linux" +} +default_toolchain { + cpu: "arm" + toolchain_identifier: "local_linux" +} +default_toolchain { + cpu: "ppc" + toolchain_identifier: "local_linux" +} + +toolchain { + abi_version: "local" + abi_libc_version: "local" + builtin_sysroot: "" + compiler: "compiler" + host_system_name: "local" + needsPic: true + supports_gold_linker: false + supports_incremental_linker: false + supports_fission: false + supports_interface_shared_objects: false + supports_normalizing_ar: false + supports_start_end_lib: false + supports_thin_archives: false + target_libc: "local" + target_cpu: "local" + target_system_name: "local" + toolchain_identifier: "local_linux" + + tool_path { name: "ar" path: "/usr/bin/ar" } + tool_path { name: "compat-ld" path: "/usr/bin/ld" } + tool_path { name: "cpp" path: "/usr/bin/cpp" } + tool_path { name: "dwp" path: "/usr/bin/dwp" } + # As part of the TensorFlow release, we place some ROCm-related compilation + # files in @local_config_rocm//crosstool/clang/bin, and this relative + # path, combined with the rest of our Bazel configuration causes our + # compilation to use those files. + tool_path { name: "gcc" path: "clang/bin/crosstool_wrapper_driver_rocm" } + # Use "-std=c++11" for hipcc. For consistency, force both the host compiler + # and the device compiler to use "-std=c++11". + cxx_flag: "-std=c++11" + linker_flag: "-Wl,-no-as-needed" + linker_flag: "-lstdc++" + #linker_flag: "-B/usr/bin/" + linker_flag: "-B/opt/rocm/hcc/compiler/bin" + +%{host_compiler_includes} + tool_path { name: "gcov" path: "/usr/bin/gcov" } + + # C(++) compiles invoke the compiler (as that is the one knowing where + # to find libraries), but we provide LD so other rules can invoke the linker. + tool_path { name: "ld" path: "/usr/bin/ld" } + + tool_path { name: "nm" path: "/usr/bin/nm" } + tool_path { name: "objcopy" path: "/usr/bin/objcopy" } + objcopy_embed_flag: "-I" + objcopy_embed_flag: "binary" + tool_path { name: "objdump" path: "/usr/bin/objdump" } + tool_path { name: "strip" path: "/usr/bin/strip" } + + # Anticipated future default. + unfiltered_cxx_flag: "-no-canonical-prefixes" + + # Make C++ compilation deterministic. Use linkstamping instead of these + # compiler symbols. + unfiltered_cxx_flag: "-Wno-builtin-macro-redefined" + unfiltered_cxx_flag: "-D__DATE__=\"redacted\"" + unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\"" + unfiltered_cxx_flag: "-D__TIME__=\"redacted\"" + unfiltered_cxx_flag: "-D__HIP_PLATFORM_HCC__" + # The macro EIGEN_USE_HIP is used to tell Eigen to use the HIP platform headers + # It needs to be always set when compiling Eigen headers + # (irrespective of whether the source file is being compiled via HIPCC) + # so adding -DEIGEN_USE_HIP as a default CXX flag here + unfiltered_cxx_flag: "-DEIGEN_USE_HIP" + + + # Security hardening on by default. + # Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases. + # We need to undef it before redefining it as some distributions now have + # it enabled by default. + #compiler_flag: "-U_FORTIFY_SOURCE" + #compiler_flag: "-D_FORTIFY_SOURCE=1" + #compiler_flag: "-fstack-protector" + #compiler_flag: "-fPIE" + #linker_flag: "-pie" + #linker_flag: "-Wl,-z,relro,-z,now" + + # Enable coloring even if there's no attached terminal. Bazel removes the + # escape sequences if --nocolor is specified. This isn't supported by gcc + # on Ubuntu 14.04. + # compiler_flag: "-fcolor-diagnostics" + + # All warnings are enabled. Maybe enable -Werror as well? + compiler_flag: "-Wall" + # Enable a few more warnings that aren't part of -Wall. + compiler_flag: "-Wunused-but-set-parameter" + # But disable some that are problematic. + compiler_flag: "-Wno-free-nonheap-object" # has false positives + + # Keep stack frames for debugging, even in opt mode. + compiler_flag: "-fno-omit-frame-pointer" + + # Anticipated future default. + linker_flag: "-no-canonical-prefixes" + unfiltered_cxx_flag: "-fno-canonical-system-headers" + # Have gcc return the exit code from ld. + linker_flag: "-pass-exit-codes" + # Stamp the binary with a unique identifier. + linker_flag: "-Wl,--build-id=md5" + linker_flag: "-Wl,--hash-style=gnu" + # Gold linker only? Can we enable this by default? + # linker_flag: "-Wl,--warn-execstack" + # linker_flag: "-Wl,--detect-odr-violations" + + # Include directory for ROCm headers. +%{rocm_include_path} + + compilation_mode_flags { + mode: DBG + # Enable debug symbols. + compiler_flag: "-g" + } + compilation_mode_flags { + mode: OPT + + # No debug symbols. + # Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or + # even generally? However, that can't happen here, as it requires special + # handling in Bazel. + compiler_flag: "-g0" + + # Conservative choice for -O + # -O3 can increase binary size and even slow down the resulting binaries. + # Profile first and / or use FDO if you need better performance than this. + compiler_flag: "-O2" + + # Disable assertions + compiler_flag: "-DNDEBUG" + + # Removal of unused code and data at link time (can this increase binary size in some cases?). + compiler_flag: "-ffunction-sections" + compiler_flag: "-fdata-sections" + linker_flag: "-Wl,--gc-sections" + } + linking_mode_flags { mode: DYNAMIC } +} diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl new file mode 100755 index 0000000000..824238022b --- /dev/null +++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl @@ -0,0 +1,241 @@ +#!/usr/bin/env python +"""Crosstool wrapper for compiling ROCm programs. + +SYNOPSIS: + crosstool_wrapper_driver_rocm [options passed in by cc_library() + or cc_binary() rule] + +DESCRIPTION: + This script is expected to be called by the cc_library() or cc_binary() bazel + rules. When the option "-x rocm" is present in the list of arguments passed + to this script, it invokes the hipcc compiler. Most arguments are passed + as is as a string to --compiler-options of hipcc. When "-x rocm" is not + present, this wrapper invokes gcc with the input arguments as is. +""" + +from __future__ import print_function + +__author__ = 'whchung@gmail.com (Wen-Heng (Jack) Chung)' + +from argparse import ArgumentParser +import os +import subprocess +import re +import sys +import pipes + +# Template values set by rocm_configure.bzl. +CPU_COMPILER = ('%{cpu_compiler}') +GCC_HOST_COMPILER_PATH = ('%{gcc_host_compiler_path}') + +HIPCC_PATH = '%{hipcc_path}' +PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH) + +def Log(s): + print('gpus/crosstool: {0}'.format(s)) + + +def GetOptionValue(argv, option): + """Extract the list of values for option from the argv list. + + Args: + argv: A list of strings, possibly the argv passed to main(). + option: The option whose value to extract, without the leading '-'. + + Returns: + A list of values, either directly following the option, + (eg., -opt val1 val2) or values collected from multiple occurrences of + the option (eg., -opt val1 -opt val2). + """ + + parser = ArgumentParser() + parser.add_argument('-' + option, nargs='*', action='append') + args, _ = parser.parse_known_args(argv) + if not args or not vars(args)[option]: + return [] + else: + return sum(vars(args)[option], []) + + +def GetHostCompilerOptions(argv): + """Collect the -isystem, -iquote, and --sysroot option values from argv. + + Args: + argv: A list of strings, possibly the argv passed to main(). + + Returns: + The string that can be used as the --compiler-options to hipcc. + """ + + parser = ArgumentParser() + parser.add_argument('-isystem', nargs='*', action='append') + parser.add_argument('-iquote', nargs='*', action='append') + parser.add_argument('--sysroot', nargs=1) + parser.add_argument('-g', nargs='*', action='append') + parser.add_argument('-fno-canonical-system-headers', action='store_true') + + args, _ = parser.parse_known_args(argv) + + opts = '' + + if args.isystem: + opts += ' -isystem ' + ' -isystem '.join(sum(args.isystem, [])) + if args.iquote: + opts += ' -iquote ' + ' -iquote '.join(sum(args.iquote, [])) + if args.g: + opts += ' -g' + ' -g'.join(sum(args.g, [])) + #if args.fno_canonical_system_headers: + # opts += ' -fno-canonical-system-headers' + if args.sysroot: + opts += ' --sysroot ' + args.sysroot[0] + + return opts + +def GetHipccOptions(argv): + """Collect the -hipcc_options values from argv. + + Args: + argv: A list of strings, possibly the argv passed to main(). + + Returns: + The string that can be passed directly to hipcc. + """ + + parser = ArgumentParser() + parser.add_argument('-hipcc_options', nargs='*', action='append') + + args, _ = parser.parse_known_args(argv) + + if args.hipcc_options: + options = _update_options(sum(args.hipcc_options, [])) + return ' '.join(['--'+a for a in options]) + return '' + + +def InvokeHipcc(argv, log=False): + """Call hipcc with arguments assembled from argv. + + Args: + argv: A list of strings, possibly the argv passed to main(). + log: True if logging is requested. + + Returns: + The return value of calling os.system('hipcc ' + args) + """ + + host_compiler_options = GetHostCompilerOptions(argv) + hipcc_compiler_options = GetHipccOptions(argv) + opt_option = GetOptionValue(argv, 'O') + m_options = GetOptionValue(argv, 'm') + m_options = ''.join([' -m' + m for m in m_options if m in ['32', '64']]) + include_options = GetOptionValue(argv, 'I') + out_file = GetOptionValue(argv, 'o') + depfiles = GetOptionValue(argv, 'MF') + defines = GetOptionValue(argv, 'D') + defines = ''.join([' -D' + define for define in defines]) + undefines = GetOptionValue(argv, 'U') + undefines = ''.join([' -U' + define for define in undefines]) + std_options = GetOptionValue(argv, 'std') + hipcc_allowed_std_options = ["c++11"] + std_options = ''.join([' -std=' + define + for define in std_options if define in hipcc_allowed_std_options]) + + # The list of source files get passed after the -c option. I don't know of + # any other reliable way to just get the list of source files to be compiled. + src_files = GetOptionValue(argv, 'c') + + if len(src_files) == 0: + return 1 + if len(out_file) != 1: + return 1 + + opt = (' -O2' if (len(opt_option) > 0 and int(opt_option[0]) > 0) + else ' -g') + + includes = (' -I ' + ' -I '.join(include_options) + if len(include_options) > 0 + else '') + + # Unfortunately, there are other options that have -c prefix too. + # So allowing only those look like C/C++ files. + src_files = [f for f in src_files if + re.search('\.cpp$|\.cc$|\.c$|\.cxx$|\.C$', f)] + srcs = ' '.join(src_files) + out = ' -o ' + out_file[0] + + hipccopts = ' ' + hipccopts += ' ' + hipcc_compiler_options + hipccopts += undefines + hipccopts += defines + hipccopts += std_options + hipccopts += m_options + + if depfiles: + # Generate the dependency file + depfile = depfiles[0] + cmd = (HIPCC_PATH + ' ' + hipccopts + + host_compiler_options + + ' ' + GCC_HOST_COMPILER_PATH + + ' -I .' + includes + ' ' + srcs + ' -M -o ' + depfile) + if log: Log(cmd) + exit_status = os.system(cmd) + if exit_status != 0: + return exit_status + + cmd = (HIPCC_PATH + ' ' + hipccopts + + host_compiler_options + ' -fPIC' + + ' ' + GCC_HOST_COMPILER_PATH + + ' -I .' + opt + includes + ' -c ' + srcs + out) + + # TODO(zhengxq): for some reason, 'gcc' needs this help to find 'as'. + # Need to investigate and fix. + cmd = 'PATH=' + PREFIX_DIR + ':$PATH ' + cmd + if log: Log(cmd) + return os.system(cmd) + + +def main(): + # ignore PWD env var + os.environ['PWD']='' + + parser = ArgumentParser() + parser.add_argument('-x', nargs=1) + parser.add_argument('--rocm_log', action='store_true') + parser.add_argument('-pass-exit-codes', action='store_true') + args, leftover = parser.parse_known_args(sys.argv[1:]) + + if args.x and args.x[0] == 'rocm': + if args.rocm_log: Log('-x rocm') + leftover = [pipes.quote(s) for s in leftover] + if args.rocm_log: Log('using hipcc') + return InvokeHipcc(leftover, log=args.rocm_log) + + # XXX use hipcc to link + if args.pass_exit_codes: + gpu_compiler_flags = [flag for flag in sys.argv[1:] + if not flag.startswith(('-pass-exit-codes'))] + + # special handling for $ORIGIN + # - guard every argument with '' + modified_gpu_compiler_flags = [] + for flag in gpu_compiler_flags: + modified_gpu_compiler_flags.append("'" + flag + "'") + + if args.rocm_log: Log('Link with hipcc: %s' % (' '.join([HIPCC_PATH] + modified_gpu_compiler_flags))) + return subprocess.call([HIPCC_PATH] + modified_gpu_compiler_flags) + + # Strip our flags before passing through to the CPU compiler for files which + # are not -x rocm. We can't just pass 'leftover' because it also strips -x. + # We not only want to pass -x to the CPU compiler, but also keep it in its + # relative location in the argv list (the compiler is actually sensitive to + # this). + cpu_compiler_flags = [flag for flag in sys.argv[1:] + if not flag.startswith(('--rocm_log'))] + + # XXX: SE codes need to be built with gcc, but need this macro defined + cpu_compiler_flags.append("-D__HIP_PLATFORM_HCC__") + + return subprocess.call([CPU_COMPILER] + cpu_compiler_flags) + +if __name__ == '__main__': + sys.exit(main()) diff --git a/third_party/gpus/rocm/BUILD b/third_party/gpus/rocm/BUILD new file mode 100644 index 0000000000..e69de29bb2 diff --git a/third_party/gpus/rocm/BUILD.tpl b/third_party/gpus/rocm/BUILD.tpl new file mode 100644 index 0000000000..8258bb3589 --- /dev/null +++ b/third_party/gpus/rocm/BUILD.tpl @@ -0,0 +1,99 @@ +licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like + +package(default_visibility = ["//visibility:public"]) + +config_setting( + name = "using_hipcc", + values = { + "define": "using_rocm_hipcc=true", + }, +) + +cc_library( + name = "rocm_headers", + hdrs = [ + "rocm/rocm_config.h", + %{rocm_headers} + ], + includes = [ + ".", + "rocm/include", + ], + visibility = ["//visibility:public"], +) + +cc_library( + name = "hip", + srcs = ["rocm/lib/%{hip_lib}"], + data = ["rocm/lib/%{hip_lib}"], + includes = [ + ".", + "rocm/include", + ], + linkstatic = 1, + visibility = ["//visibility:public"], +) + +cc_library( + name = "rocblas", + srcs = ["rocm/lib/%{rocblas_lib}"], + data = ["rocm/lib/%{rocblas_lib}"], + includes = [ + ".", + "rocm/include", + ], + linkstatic = 1, + visibility = ["//visibility:public"], +) + +cc_library( + name = "rocfft", + srcs = ["rocm/lib/%{rocfft_lib}"], + data = ["rocm/lib/%{rocfft_lib}"], + includes = [ + ".", + "rocm/include", + ], + linkstatic = 1, + visibility = ["//visibility:public"], +) + +cc_library( + name = "hiprand", + srcs = ["rocm/lib/%{hiprand_lib}"], + data = ["rocm/lib/%{hiprand_lib}"], + includes = [ + ".", + "rocm/include", + "rocm/include/rocrand", + ], + linkstatic = 1, + visibility = ["//visibility:public"], +) + +cc_library( + name = "miopen", + srcs = ["rocm/lib/%{miopen_lib}"], + data = ["rocm/lib/%{miopen_lib}"], + includes = [ + ".", + "rocm/include", + ], + linkstatic = 1, + visibility = ["//visibility:public"], +) + +cc_library( + name = "rocm", + visibility = ["//visibility:public"], + deps = [ + ":rocm_headers", + ":hip", + ":rocblas", + ":rocfft", + ":hiprand", + ":miopen", + ], +) + +%{rocm_include_genrules} diff --git a/third_party/gpus/rocm/build_defs.bzl.tpl b/third_party/gpus/rocm/build_defs.bzl.tpl new file mode 100644 index 0000000000..306f57551f --- /dev/null +++ b/third_party/gpus/rocm/build_defs.bzl.tpl @@ -0,0 +1,32 @@ +# Macros for building ROCm code. +def if_rocm(if_true, if_false = []): + """Shorthand for select()'ing on whether we're building with ROCm. + + Returns a select statement which evaluates to if_true if we're building + with ROCm enabled. Otherwise, the select statement evaluates to if_false. + + """ + return select({ + "@local_config_rocm//rocm:using_hipcc": if_true, + "//conditions:default": if_false + }) + + +def rocm_default_copts(): + """Default options for all ROCm compilations.""" + return if_rocm(["-x", "rocm"] + %{rocm_extra_copts}) + + +def rocm_is_configured(): + """Returns true if ROCm was enabled during the configure process.""" + return %{rocm_is_configured} + +def if_rocm_is_configured(x): + """Tests if the ROCm was enabled during the configure process. + + Unlike if_rocm(), this does not require that we are building with + --config=rocm. Used to allow non-ROCm code to depend on ROCm libraries. + """ + if rocm_is_configured(): + return x + return [] diff --git a/third_party/gpus/rocm/rocm_config.h.tpl b/third_party/gpus/rocm/rocm_config.h.tpl new file mode 100644 index 0000000000..c5f25a845c --- /dev/null +++ b/third_party/gpus/rocm/rocm_config.h.tpl @@ -0,0 +1,21 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef ROCM_ROCM_CONFIG_H_ +#define ROCM_ROCM_CONFIG_H_ + +#define TF_ROCM_TOOLKIT_PATH "/opt/rocm" + +#endif // ROCM_ROCM_CONFIG_H_ diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl new file mode 100644 index 0000000000..9371e33f97 --- /dev/null +++ b/third_party/gpus/rocm_configure.bzl @@ -0,0 +1,663 @@ +# -*- Python -*- +"""Repository rule for ROCm autoconfiguration. + +`rocm_configure` depends on the following environment variables: + + * `TF_NEED_ROCM`: Whether to enable building with ROCm. + * `GCC_HOST_COMPILER_PATH`: The GCC host compiler path + * `ROCM_TOOLKIT_PATH`: The path to the ROCm toolkit. Default is + `/opt/rocm`. + * `TF_ROCM_VERSION`: The version of the ROCm toolkit. If this is blank, then + use the system default. + * `TF_MIOPEN_VERSION`: The version of the MIOpen library. + * `TF_ROCM_AMDGPU_TARGETS`: The AMDGPU targets. Default is + `gfx803,gfx900`. +""" + +_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH" +_ROCM_TOOLKIT_PATH = "ROCM_TOOLKIT_PATH" +_TF_ROCM_VERSION = "TF_ROCM_VERSION" +_TF_MIOPEN_VERSION = "TF_MIOPEN_VERSION" +_TF_ROCM_AMDGPU_TARGETS = "TF_ROCM_AMDGPU_TARGETS" +_TF_ROCM_CONFIG_REPO = "TF_ROCM_CONFIG_REPO" + +_DEFAULT_ROCM_VERSION = "" +_DEFAULT_MIOPEN_VERSION = "" +_DEFAULT_ROCM_TOOLKIT_PATH = "/opt/rocm" +_DEFAULT_ROCM_AMDGPU_TARGETS = ["gfx803", "gfx900"] + +def find_cc(repository_ctx): + """Find the C++ compiler.""" + # Return a dummy value for GCC detection here to avoid error + target_cc_name = "gcc" + cc_path_envvar = _GCC_HOST_COMPILER_PATH + cc_name = target_cc_name + + if cc_path_envvar in repository_ctx.os.environ: + cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip() + if cc_name_from_env: + cc_name = cc_name_from_env + if cc_name.startswith("/"): + # Absolute path, maybe we should make this supported by our which function. + return cc_name + cc = repository_ctx.which(cc_name) + if cc == None: + fail(("Cannot find {}, either correct your path or set the {}" + + " environment variable").format(target_cc_name, cc_path_envvar)) + return cc + +_INC_DIR_MARKER_BEGIN = "#include <...>" + +def _cxx_inc_convert(path): + """Convert path returned by cc -E xc++ in a complete path.""" + path = path.strip() + return path + +def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp): + """Compute the list of default C or C++ include directories.""" + if lang_is_cpp: + lang = "c++" + else: + lang = "c" + # TODO: We pass -no-canonical-prefixes here to match the compiler flags, + # but in rocm_clang CROSSTOOL file that is a `feature` and we should + # handle the case when it's disabled and no flag is passed + result = repository_ctx.execute([cc, "-no-canonical-prefixes", + "-E", "-x" + lang, "-", "-v"]) + index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN) + if index1 == -1: + return [] + index1 = result.stderr.find("\n", index1) + if index1 == -1: + return [] + index2 = result.stderr.rfind("\n ") + if index2 == -1 or index2 < index1: + return [] + index2 = result.stderr.find("\n", index2 + 1) + if index2 == -1: + inc_dirs = result.stderr[index1 + 1:] + else: + inc_dirs = result.stderr[index1 + 1:index2].strip() + + return [str(repository_ctx.path(_cxx_inc_convert(p))) + for p in inc_dirs.split("\n")] + +def get_cxx_inc_directories(repository_ctx, cc): + """Compute the list of default C and C++ include directories.""" + # For some reason `clang -xc` sometimes returns include paths that are + # different from the ones from `clang -xc++`. (Symlink and a dir) + # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists + includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True) + includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False) + + includes_cpp_set = depset(includes_cpp) + return includes_cpp + [inc for inc in includes_c + if inc not in includes_cpp_set] + +def auto_configure_fail(msg): + """Output failure message when rocm configuration fails.""" + red = "\033[0;31m" + no_color = "\033[0m" + fail("\n%sROCm Configuration Error:%s %s\n" % (red, no_color, msg)) +# END cc_configure common functions (see TODO above). + +def _host_compiler_includes(repository_ctx, cc): + """Generates the cxx_builtin_include_directory entries for gcc inc dirs. + + Args: + repository_ctx: The repository context. + cc: The path to the gcc host compiler. + + Returns: + A string containing the cxx_builtin_include_directory for each of the gcc + host compiler include directories, which can be added to the CROSSTOOL + file. + """ + inc_dirs = get_cxx_inc_directories(repository_ctx, cc) + + # Add numpy headers + inc_dirs.append("/usr/lib/python2.7/dist-packages/numpy/core/include") + + entries = [] + for inc_dir in inc_dirs: + entries.append(" cxx_builtin_include_directory: \"%s\"" % inc_dir) + + # define TENSORFLOW_USE_ROCM + entries.append(" unfiltered_cxx_flag: \"-DTENSORFLOW_USE_ROCM\"") + + return "\n".join(entries) + +def _rocm_include_path(repository_ctx, rocm_config): + """Generates the cxx_builtin_include_directory entries for rocm inc dirs. + + Args: + repository_ctx: The repository context. + cc: The path to the gcc host compiler. + + Returns: + A string containing the cxx_builtin_include_directory for each of the gcc + host compiler include directories, which can be added to the CROSSTOOL + file. + """ + inc_dirs = [] + + # general ROCm include path + inc_dirs.append(rocm_config.rocm_toolkit_path + '/include') + + # Add HSA headers + inc_dirs.append("/opt/rocm/hsa/include") + + # Add HIP headers + inc_dirs.append("/opt/rocm/include/hip") + inc_dirs.append("/opt/rocm/include/hip/hcc_detail") + + # Add rocrand and hiprand headers + inc_dirs.append("/opt/rocm/rocrand/include") + inc_dirs.append("/opt/rocm/hiprand/include") + + # Add rocfft headers + inc_dirs.append("/opt/rocm/rocfft/include") + + # Add rocBLAS headers + inc_dirs.append("/opt/rocm/rocblas/include") + + # Add MIOpen headers + inc_dirs.append("/opt/rocm/miopen/include") + + # Add hcc headers + inc_dirs.append("/opt/rocm/hcc/include") + inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/7.0.0/include/") + inc_dirs.append("/opt/rocm/hcc/lib/clang/7.0.0/include") + # Newer hcc builds use/are based off of clang 8.0.0. + inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/8.0.0/include/") + inc_dirs.append("/opt/rocm/hcc/lib/clang/8.0.0/include") + + inc_entries = [] + for inc_dir in inc_dirs: + inc_entries.append(" cxx_builtin_include_directory: \"%s\"" % inc_dir) + return "\n".join(inc_entries) + +def _enable_rocm(repository_ctx): + if "TF_NEED_ROCM" in repository_ctx.os.environ: + enable_rocm = repository_ctx.os.environ["TF_NEED_ROCM"].strip() + return enable_rocm == "1" + return False + +def _rocm_toolkit_path(repository_ctx): + """Finds the rocm toolkit directory. + + Args: + repository_ctx: The repository context. + + Returns: + A speculative real path of the rocm toolkit install directory. + """ + rocm_toolkit_path = _DEFAULT_ROCM_TOOLKIT_PATH + if _ROCM_TOOLKIT_PATH in repository_ctx.os.environ: + rocm_toolkit_path = repository_ctx.os.environ[_ROCM_TOOLKIT_PATH].strip() + if not repository_ctx.path(rocm_toolkit_path).exists: + auto_configure_fail("Cannot find rocm toolkit path.") + return str(repository_ctx.path(rocm_toolkit_path).realpath) + +def _amdgpu_targets(repository_ctx): + """Returns a list of strings representing AMDGPU targets.""" + if _TF_ROCM_AMDGPU_TARGETS not in repository_ctx.os.environ: + return _DEFAULT_ROCM_AMDGPU_TARGETS + amdgpu_targets_str = repository_ctx.os.environ[_TF_ROCM_AMDGPU_TARGETS] + amdgpu_targets = amdgpu_targets_str.split(",") + for amdgpu_target in amdgpu_targets: + if amdgpu_target[:3] != "gfx" or not amdgpu_target[3:].isdigit(): + auto_configure_fail("Invalid AMDGPU target: %s" % amdgpu_target) + return amdgpu_targets + +def _cpu_value(repository_ctx): + """Returns the name of the host operating system. + + Args: + repository_ctx: The repository context. + + Returns: + A string containing the name of the host operating system. + """ + os_name = repository_ctx.os.name.lower() + if os_name.startswith("mac os"): + return "Darwin" + if os_name.find("windows") != -1: + return "Windows" + result = repository_ctx.execute(["uname", "-s"]) + return result.stdout.strip() + +def _lib_name(lib, cpu_value, version="", static=False): + """Constructs the platform-specific name of a library. + + Args: + lib: The name of the library, such as "hip" + cpu_value: The name of the host operating system. + version: The version of the library. + static: True the library is static or False if it is a shared object. + + Returns: + The platform-specific name of the library. + """ + if cpu_value in ("Linux"): + if static: + return "lib%s.a" % lib + else: + if version: + version = ".%s" % version + return "lib%s.so%s" % (lib, version) + elif cpu_value == "Windows": + return "%s.lib" % lib + elif cpu_value == "Darwin": + if static: + return "lib%s.a" % lib + elif version: + version = ".%s" % version + return "lib%s%s.dylib" % (lib, version) + else: + auto_configure_fail("Invalid cpu_value: %s" % cpu_value) + +def _find_rocm_lib(lib, repository_ctx, cpu_value, basedir, version="", + static=False): + """Finds the given ROCm libraries on the system. + + Args: + lib: The name of the library, such as "hip" + repository_ctx: The repository context. + cpu_value: The name of the host operating system. + basedir: The install directory of ROCm. + version: The version of the library. + static: True if static library, False if shared object. + + Returns: + Returns a struct with the following fields: + file_name: The basename of the library found on the system. + path: The full path to the library. + """ + file_name = _lib_name(lib, cpu_value, version, static) + if cpu_value == "Linux": + path = repository_ctx.path("%s/lib64/%s" % (basedir, file_name)) + if path.exists: + return struct(file_name=file_name, path=str(path.realpath)) + path = repository_ctx.path("%s/lib64/stubs/%s" % (basedir, file_name)) + if path.exists: + return struct(file_name=file_name, path=str(path.realpath)) + path = repository_ctx.path( + "%s/lib/x86_64-linux-gnu/%s" % (basedir, file_name)) + if path.exists: + return struct(file_name=file_name, path=str(path.realpath)) + + path = repository_ctx.path("%s/lib/%s" % (basedir, file_name)) + if path.exists: + return struct(file_name=file_name, path=str(path.realpath)) + path = repository_ctx.path("%s/%s" % (basedir, file_name)) + if path.exists: + return struct(file_name=file_name, path=str(path.realpath)) + + auto_configure_fail("Cannot find rocm library %s" % file_name) + +def _find_libs(repository_ctx, rocm_config): + """Returns the ROCm libraries on the system. + + Args: + repository_ctx: The repository context. + rocm_config: The ROCm config as returned by _get_rocm_config + + Returns: + Map of library names to structs of filename and path as returned by + _find_rocm_lib. + """ + cpu_value = rocm_config.cpu_value + return { + "hip": _find_rocm_lib( + "hip_hcc", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path), + "rocblas": _find_rocm_lib( + "rocblas", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path + "/rocblas"), + "rocfft": _find_rocm_lib( + "rocfft", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path + "/rocfft"), + "hiprand": _find_rocm_lib( + "hiprand", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path + "/hiprand"), + "miopen": _find_rocm_lib( + "MIOpen", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path + "/miopen"), + } + +def _get_rocm_config(repository_ctx): + """Detects and returns information about the ROCm installation on the system. + + Args: + repository_ctx: The repository context. + + Returns: + A struct containing the following fields: + rocm_toolkit_path: The ROCm toolkit installation directory. + amdgpu_targets: A list of the system's AMDGPU targets. + cpu_value: The name of the host operating system. + """ + cpu_value = _cpu_value(repository_ctx) + rocm_toolkit_path = _rocm_toolkit_path(repository_ctx) + return struct( + rocm_toolkit_path = rocm_toolkit_path, + amdgpu_targets = _amdgpu_targets(repository_ctx), + cpu_value = cpu_value) + +def _tpl(repository_ctx, tpl, substitutions={}, out=None): + if not out: + out = tpl.replace(":", "/") + repository_ctx.template( + out, + Label("//third_party/gpus/%s.tpl" % tpl), + substitutions) + + +def _file(repository_ctx, label): + repository_ctx.template( + label.replace(":", "/"), + Label("//third_party/gpus/%s.tpl" % label), + {}) + + +_DUMMY_CROSSTOOL_BZL_FILE = """ +def error_gpu_disabled(): + fail("ERROR: Building with --config=rocm but TensorFlow is not configured " + + "to build with GPU support. Please re-run ./configure and enter 'Y' " + + "at the prompt to build with GPU support.") + + native.genrule( + name = "error_gen_crosstool", + outs = ["CROSSTOOL"], + cmd = "echo 'Should not be run.' && exit 1", + ) + + native.filegroup( + name = "crosstool", + srcs = [":CROSSTOOL"], + output_licenses = ["unencumbered"], + ) +""" + + +_DUMMY_CROSSTOOL_BUILD_FILE = """ +load("//crosstool:error_gpu_disabled.bzl", "error_gpu_disabled") + +error_gpu_disabled() +""" + +def _create_dummy_repository(repository_ctx): + cpu_value = _cpu_value(repository_ctx) + + # Set up BUILD file for rocm/. + _tpl(repository_ctx, "rocm:build_defs.bzl", + { + "%{rocm_is_configured}": "False", + "%{rocm_extra_copts}": "[]" + }) + _tpl(repository_ctx, "rocm:BUILD", + { + "%{hip_lib}": _lib_name("hip", cpu_value), + "%{rocblas_lib}": _lib_name("rocblas", cpu_value), + "%{miopen_lib}": _lib_name("miopen", cpu_value), + "%{rocfft_lib}": _lib_name("rocfft", cpu_value), + "%{hiprand_lib}": _lib_name("hiprand", cpu_value), + "%{rocm_include_genrules}": '', + "%{rocm_headers}": '', + }) + + # Create dummy files for the ROCm toolkit since they are still required by + # tensorflow/core/platform/default/build_config:rocm. + repository_ctx.file("rocm/hip/include/hip/hip_runtime.h", "") + + # Set up rocm_config.h, which is used by + # tensorflow/stream_executor/dso_loader.cc. + _tpl(repository_ctx, "rocm:rocm_config.h", + { + "%{rocm_toolkit_path}": _DEFAULT_ROCM_TOOLKIT_PATH, + }, "rocm/rocm/rocm_config.h") + + # If rocm_configure is not configured to build with GPU support, and the user + # attempts to build with --config=rocm, add a dummy build rule to intercept + # this and fail with an actionable error message. + repository_ctx.file("crosstool/error_gpu_disabled.bzl", + _DUMMY_CROSSTOOL_BZL_FILE) + repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE) + +def _execute(repository_ctx, cmdline, error_msg=None, error_details=None, + empty_stdout_fine=False): + """Executes an arbitrary shell command. + + Args: + repository_ctx: the repository_ctx object + cmdline: list of strings, the command to execute + error_msg: string, a summary of the error if the command fails + error_details: string, details about the error or steps to fix it + empty_stdout_fine: bool, if True, an empty stdout result is fine, otherwise + it's an error + Return: + the result of repository_ctx.execute(cmdline) + """ + result = repository_ctx.execute(cmdline) + if result.stderr or not (empty_stdout_fine or result.stdout): + auto_configure_fail( + "\n".join([ + error_msg.strip() if error_msg else "Repository command failed", + result.stderr.strip(), + error_details if error_details else ""])) + return result + +def _norm_path(path): + """Returns a path with '/' and remove the trailing slash.""" + path = path.replace("\\", "/") + if path[-1] == "/": + path = path[:-1] + return path + +def _symlink_genrule_for_dir(repository_ctx, src_dir, dest_dir, genrule_name, + src_files = [], dest_files = []): + """Returns a genrule to symlink(or copy if on Windows) a set of files. + + If src_dir is passed, files will be read from the given directory; otherwise + we assume files are in src_files and dest_files + """ + if src_dir != None: + src_dir = _norm_path(src_dir) + dest_dir = _norm_path(dest_dir) + files = _read_dir(repository_ctx, src_dir) + # Create a list with the src_dir stripped to use for outputs. + dest_files = files.replace(src_dir, '').splitlines() + src_files = files.splitlines() + command = [] + # We clear folders that might have been generated previously to avoid + # undesired inclusions + command.append('if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi') + command.append('if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi') + outs = [] + for i in range(len(dest_files)): + if dest_files[i] != "": + # If we have only one file to link we do not want to use the dest_dir, as + # $(@D) will include the full path to the file. + dest = '$(@D)/' + dest_dir + dest_files[i] if len(dest_files) != 1 else '$(@D)/' + dest_files[i] + # On Windows, symlink is not supported, so we just copy all the files. + cmd = 'ln -s' + command.append(cmd + ' "%s" "%s"' % (src_files[i] , dest)) + outs.append(' "' + dest_dir + dest_files[i] + '",') + genrule = _genrule(src_dir, genrule_name, " && ".join(command), + "\n".join(outs)) + return genrule + +def _genrule(src_dir, genrule_name, command, outs): + """Returns a string with a genrule. + + Genrule executes the given command and produces the given outputs. + """ + return ( + 'genrule(\n' + + ' name = "' + + genrule_name + '",\n' + + ' outs = [\n' + + outs + + '\n ],\n' + + ' cmd = """\n' + + command + + '\n """,\n' + + ')\n' + ) + +def _read_dir(repository_ctx, src_dir): + """Returns a string with all files in a directory. + + Finds all files inside a directory, traversing subfolders and following + symlinks. The returned string contains the full path of all files + separated by line breaks. + """ + find_result = _execute( + repository_ctx, ["find", src_dir, "-follow", "-type", "f"], + empty_stdout_fine=True) + result = find_result.stdout + return result + +def _compute_rocm_extra_copts(repository_ctx, amdgpu_targets): + if False: + amdgpu_target_flags = ["--amdgpu-target=" + + amdgpu_target for amdgpu_target in amdgpu_targets] + else: + # AMDGPU targets are handled in the "crosstool_wrapper_driver_is_not_gcc" + amdgpu_target_flags = [] + return str(amdgpu_target_flags) + +def _create_local_rocm_repository(repository_ctx): + """Creates the repository containing files set up to build with ROCm.""" + rocm_config = _get_rocm_config(repository_ctx) + + # Set up symbolic links for the rocm toolkit by creating genrules to do + # symlinking. We create one genrule for each directory we want to track under + # rocm_toolkit_path + rocm_toolkit_path = rocm_config.rocm_toolkit_path + rocm_include_path = rocm_toolkit_path + "/include" + genrules = [_symlink_genrule_for_dir(repository_ctx, + rocm_include_path, "rocm/include", "rocm-include")] + genrules.append(_symlink_genrule_for_dir(repository_ctx, + rocm_toolkit_path + "/rocfft/include", "rocm/include/rocfft", "rocfft-include")) + genrules.append(_symlink_genrule_for_dir(repository_ctx, + rocm_toolkit_path + "/rocblas/include", "rocm/include/rocblas", "rocblas-include")) + genrules.append(_symlink_genrule_for_dir(repository_ctx, + rocm_toolkit_path + "/miopen/include", "rocm/include/miopen", "miopen-include")) + + rocm_libs = _find_libs(repository_ctx, rocm_config) + rocm_lib_src = [] + rocm_lib_dest = [] + for lib in rocm_libs.values(): + rocm_lib_src.append(lib.path) + rocm_lib_dest.append("rocm/lib/" + lib.file_name) + genrules.append(_symlink_genrule_for_dir(repository_ctx, None, "", "rocm-lib", + rocm_lib_src, rocm_lib_dest)) + + included_files = _read_dir(repository_ctx, rocm_include_path).replace( + rocm_include_path, '').splitlines() + + # Set up BUILD file for rocm/ + _tpl(repository_ctx, "rocm:build_defs.bzl", + { + "%{rocm_is_configured}": "True", + "%{rocm_extra_copts}": _compute_rocm_extra_copts( + repository_ctx, rocm_config.amdgpu_targets), + + }) + _tpl(repository_ctx, "rocm:BUILD", + { + "%{hip_lib}": rocm_libs["hip"].file_name, + "%{rocblas_lib}": rocm_libs["rocblas"].file_name, + "%{rocfft_lib}": rocm_libs["rocfft"].file_name, + "%{hiprand_lib}": rocm_libs["hiprand"].file_name, + "%{miopen_lib}": rocm_libs["miopen"].file_name, + "%{rocm_include_genrules}": "\n".join(genrules), + "%{rocm_headers}": ('":rocm-include",\n' + + '":rocfft-include",\n' + + '":rocblas-include",\n' + + '":miopen-include",'), + }) + # Set up crosstool/ + _tpl(repository_ctx, "crosstool:BUILD", {"%{linker_files}": ":empty", "%{win_linker_files}": ":empty"}) + cc = find_cc(repository_ctx) + host_compiler_includes = _host_compiler_includes(repository_ctx, cc) + rocm_defines = { + "%{rocm_include_path}": _rocm_include_path(repository_ctx, + rocm_config), + "%{host_compiler_includes}": host_compiler_includes, + "%{clang_path}": str(cc), + } + + _tpl(repository_ctx, "crosstool:CROSSTOOL_hipcc", rocm_defines, out="crosstool/CROSSTOOL") + + _tpl(repository_ctx, + "crosstool:clang/bin/crosstool_wrapper_driver_rocm", + { + "%{cpu_compiler}": str(cc), + "%{hipcc_path}": "/opt/rocm/bin/hipcc", + "%{gcc_host_compiler_path}": str(cc), + "%{rocm_amdgpu_targets}": ",".join( + ["\"%s\"" % c for c in rocm_config.amdgpu_targets]), + }) + + # Set up rocm_config.h, which is used by + # tensorflow/stream_executor/dso_loader.cc. + _tpl(repository_ctx, "rocm:rocm_config.h", + { + "%{rocm_amdgpu_targets}": ",".join( + ["\"%s\"" % c for c in rocm_config.amdgpu_targets]), + "%{rocm_toolkit_path}": rocm_config.rocm_toolkit_path, + }, "rocm/rocm/rocm_config.h") + + +def _create_remote_rocm_repository(repository_ctx, remote_config_repo): + """Creates pointers to a remotely configured repo set up to build with ROCm.""" + _tpl(repository_ctx, "rocm:build_defs.bzl", + { + "%{rocm_is_configured}": "True", + "%{rocm_extra_copts}": _compute_rocm_extra_copts( + repository_ctx, #_compute_capabilities(repository_ctx) + ), + + }) + _tpl(repository_ctx, "rocm:remote.BUILD", + { + "%{remote_rocm_repo}": remote_config_repo, + }, "rocm/BUILD") + _tpl(repository_ctx, "crosstool:remote.BUILD", { + "%{remote_rocm_repo}": remote_config_repo, + }, "crosstool/BUILD") + +def _rocm_autoconf_impl(repository_ctx): + """Implementation of the rocm_autoconf repository rule.""" + if not _enable_rocm(repository_ctx): + _create_dummy_repository(repository_ctx) + else: + if _TF_ROCM_CONFIG_REPO in repository_ctx.os.environ: + _create_remote_rocm_repository(repository_ctx, + repository_ctx.os.environ[_TF_ROCM_CONFIG_REPO]) + else: + _create_local_rocm_repository(repository_ctx) + + +rocm_configure = repository_rule( + implementation = _rocm_autoconf_impl, + environ = [ + _GCC_HOST_COMPILER_PATH, + "TF_NEED_ROCM", + _ROCM_TOOLKIT_PATH, + _TF_ROCM_VERSION, + _TF_MIOPEN_VERSION, + _TF_ROCM_AMDGPU_TARGETS, + _TF_ROCM_CONFIG_REPO, + ], +) + +"""Detects and configures the local ROCm toolchain. + +Add the following to your WORKSPACE FILE: + +```python +rocm_configure(name = "local_config_rocm") +``` + +Args: + name: A unique name for this workspace rule. +""" diff --git a/tools/bazel.rc b/tools/bazel.rc index 601e07ffdd..afc5cf56ab 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -42,6 +42,9 @@ build:download_clang_use_lld --linkopt='-fuse-ld=lld' build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true +build:rocm --crosstool_top=@local_config_rocm//crosstool:toolchain +build:rocm --define=using_rocm=true --define=using_rocm_hipcc=true + build:cuda_clang --crosstool_top=@local_config_cuda//crosstool:toolchain build:cuda_clang --define=using_cuda=true --define=using_cuda_clang=true --define=using_clang=true -- cgit v1.2.3