[XLA] Read GPU architecture from StreamExecutor, rather than a flag.

Previously, we read the GPU architecture from the --gpu_architecture flag, which defaulted to compute_35. We'd then use this value when choosing a libdevice file and when telling LLVM which GPU architecture we're compiling for. Now we read this value from the StreamExecutor for the device on which we're going to run our computation. This change also adds more supported GPU architectures to the GPU backend, so we choose the right libdevice for your GPU. This change is necessary before we can begin emitting fp16 arithmetic and other sm_60+ ops. Change: 147971326
author: Justin Lebar <jlebar@google.com> 2017-02-19 12:33:44 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-02-19 12:53:26 -0800
commit: 7b02fa6a27022275517ed5b851b06ba19a11bdf0 (patch)
tree: bcb4be30b332a11f57dc6677196307fe5f6d981e /tensorflow/compiler/xla/legacy_flags
parent: 2c3469018589ffece9938797f618e5b3228074fa (diff)
2 files changed, 0 insertions, 4 deletions
diff --git a/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.cc b/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.cc
index c355b1ed9b..f8f6ea26b1 100644
--- a/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.cc
+++ b/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.cc
@@ -38,7 +38,6 @@ static void AllocateFlags() {
   flags->dump_temp_products_to = "";
   flags->ftz = false;
   flags->fma = true;
-  flags->gpu_architecture = "compute_35";
   flags->verbose_ptx_asm = false;
   flags->kernel = "";
   flags->llvm_dump_passes = false;
@@ -51,8 +50,6 @@ static void AllocateFlags() {
                        "If empty, no dump is produced"),
       tensorflow::Flag("ftz", &flags->ftz, "flush to zero semantics"),
       tensorflow::Flag("fma", &flags->fma, "use FMA synthesis"),
-      tensorflow::Flag("gpu_architecture", &flags->gpu_architecture,
-                       "GPU architecture"),
       tensorflow::Flag("verbose_ptx_asm", &flags->verbose_ptx_asm,
                        "emit PTX assembly with extra comments"),
       tensorflow::Flag("kernel", &flags->kernel,
diff --git a/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.h b/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.h
index fbb8863454..31cb50e9da 100644
--- a/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.h
+++ b/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.h
@@ -36,7 +36,6 @@ typedef struct {
   string dump_temp_products_to;  // temporary compilation products dir
   bool ftz;                      // flush to zero semantics
   bool fma;                      // use FMA synthesis
-  string gpu_architecture;       // GPU architecture
   bool verbose_ptx_asm;          // emit PTX assembly with extra comments
   string kernel;                 // only emit the IR and PTX for this kernel
   bool llvm_dump_passes;         // dump the passes LLVM runs to stderr
author	Justin Lebar <jlebar@google.com>	2017-02-19 12:33:44 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-02-19 12:53:26 -0800
commit	7b02fa6a27022275517ed5b851b06ba19a11bdf0 (patch)
tree	bcb4be30b332a11f57dc6677196307fe5f6d981e /tensorflow/compiler/xla/legacy_flags
parent	2c3469018589ffece9938797f618e5b3228074fa (diff)