diff options
author | Justin Lebar <jlebar@google.com> | 2017-10-16 17:22:26 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-10-16 17:26:46 -0700 |
commit | 684f88fa7e61721c3264dc70abeed2b3e6fa7717 (patch) | |
tree | 7531a10eb6980e4b7840afc43aec250bf144b60b | |
parent | 99dffc958a1cfa4e5a2f81e8f4085277a0c34bd9 (diff) |
[XLA:GPU] Don't crash with --vmodule=gpu_compiler=2 if we can't run ptxas.
At --vmodule=gpu_compiler=2, we run ptxas over our generated PTX, to
validate it, and also to dump out stats like the number of registers
used.
But previously, this would fail if your GPU was anything other than
sm_35 (i.e. K20/40/80), because we didn't pass down cc_major/cc_minor to
ptxas. And moreover, if ptxas failed to compile your program, we'd
LOG(FATAL), which is probably no what you want.
This change fixes both those issues. Tested on my local GTX1080.
PiperOrigin-RevId: 172403304
-rw-r--r-- | tensorflow/compiler/xla/service/gpu/gpu_compiler.cc | 20 |
1 files changed, 14 insertions, 6 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 57f11db11f..3e16e4e3c4 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -67,6 +67,7 @@ limitations under the License. #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/cuda_libdevice_path.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/logging.h" @@ -84,6 +85,8 @@ namespace gpu { namespace { +using tensorflow::strings::StrCat; + // Any address of a variable residing in global memory or returned by one of the // memory allocation routines from the driver or runtime API is always aligned // to at least 256 bytes. @@ -223,7 +226,7 @@ tensorflow::Status PrepareHloModuleForIrEmitting( } // Invokes the ptxas tool on the given PTX string, and dumps its output. -void DumpPtxasInfo(const string& ptx) { +void DumpPtxasInfo(const string& ptx, int cc_major, int cc_minor) { const string ptxas_path = tensorflow::io::JoinPath(tensorflow::CudaRoot(), "bin/ptxas"); // Do not log PTX stats if ptxas is not found at the given path. @@ -245,17 +248,22 @@ void DumpPtxasInfo(const string& ptx) { // Invoke ptxas and collect its output. tensorflow::SubProcess ptxas_info_dumper; - ptxas_info_dumper.SetProgram(ptxas_path, {ptxas_path, ptx_path, "-o", - "/dev/null", "-v", "-arch=sm_35"}); + ptxas_info_dumper.SetProgram(ptxas_path, + {ptxas_path, ptx_path, "-o", "/dev/null", "-v", + StrCat("-arch=sm_", cc_major, cc_minor)}); ptxas_info_dumper.SetChannelAction(tensorflow::CHAN_STDERR, tensorflow::ACTION_PIPE); - CHECK(ptxas_info_dumper.Start()); + if (!ptxas_info_dumper.Start()) { + LOG(ERROR) << "Failed to launch ptxas."; + return; + } string stderr_output; int exit_status = ptxas_info_dumper.Communicate( /*stdin_input=*/nullptr, /*stdout_output=*/nullptr, &stderr_output); XLA_LOG_LINES(tensorflow::INFO, stderr_output); if (exit_status != 0) { - LOG(FATAL) << "Invalid PTX. See the error message above for reasons."; + LOG(ERROR) << "ptxas exited with non-zero error code " << exit_status + << "."; } } @@ -387,7 +395,7 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::Compile( VLOG(2) << "PTX:"; XLA_VLOG_LINES(2, *ptx); if (VLOG_IS_ON(2)) { - DumpPtxasInfo(*ptx); + DumpPtxasInfo(*ptx, cc_major, cc_minor); } auto thunk_schedule = MakeUnique<ThunkSchedule>( |