diff options
Diffstat (limited to 'tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc')
-rw-r--r-- | tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc | 24 |
1 files changed, 19 insertions, 5 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc index 2e7765c4c6..b24fe417ff 100644 --- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc +++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc @@ -71,7 +71,18 @@ const int kDefaultInlineThreshold = 1100; // Gets the libdevice filename for a particular compute capability. When // presented with a GPU we don't recognize, we just return the libdevice from // compute_20. -static string GetLibdeviceFilename(std::pair<int, int> compute_capability) { +static string GetLibdeviceFilename(const string& libdevice_dir_path, + std::pair<int, int> compute_capability) { + // Since CUDA 9.0, all GPU versions are included in a single file + const char* unified_libdevice_filename = "libdevice.10.bc"; + std::vector<string> unified_libdevice_files; + const tensorflow::Status status = + tensorflow::Env::Default()->GetMatchingPaths( + tensorflow::io::JoinPath(libdevice_dir_path, unified_libdevice_filename), + &unified_libdevice_files); + if (status.ok() && unified_libdevice_files.size() == 1) { + return unified_libdevice_filename; + } // There are only four libdevice files: compute_{20,30,35,50}. Each GPU // version gets mapped to one of these. Note in particular that sm_60 and // sm_61 map to libdevice.compute_30. @@ -101,7 +112,7 @@ static string GetLibdeviceFilename(std::pair<int, int> compute_capability) { } // Gets the GPU name as it's known to LLVM for a given compute capability. If -// we see an unrecognized compute capability, we return "sm_20". +// we see an unrecognized compute capability, we return "sm_30". static string GetSmName(std::pair<int, int> compute_capability) { static auto* m = new std::map<std::pair<int, int>, int>({{{2, 0}, 20}, {{2, 1}, 21}, @@ -114,8 +125,10 @@ static string GetSmName(std::pair<int, int> compute_capability) { {{5, 3}, 53}, {{6, 0}, 60}, {{6, 1}, 61}, - {{6, 2}, 62}}); - int sm_version = 20; + {{6, 2}, 62}, + // TODO: Change this to 70 once LLVM NVPTX supports it + {{7, 0}, 60}}); + int sm_version = 30; auto it = m->find(compute_capability); if (it != m->end()) { sm_version = it->second; @@ -306,7 +319,8 @@ tensorflow::Status LinkLibdeviceIfNecessary( llvm::Linker linker(*module); string libdevice_path = tensorflow::io::JoinPath( - libdevice_dir_path, GetLibdeviceFilename(compute_capability)); + libdevice_dir_path, GetLibdeviceFilename(libdevice_dir_path, + compute_capability)); TF_RETURN_IF_ERROR(tensorflow::Env::Default()->FileExists(libdevice_path)); VLOG(1) << "Linking with libdevice from: " << libdevice_path; std::unique_ptr<llvm::Module> libdevice_module = |