aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/legacy_flags
diff options
context:
space:
mode:
authorGravatar Justin Lebar <jlebar@google.com>2017-02-19 12:33:44 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-02-19 12:53:26 -0800
commit7b02fa6a27022275517ed5b851b06ba19a11bdf0 (patch)
treebcb4be30b332a11f57dc6677196307fe5f6d981e /tensorflow/compiler/xla/legacy_flags
parent2c3469018589ffece9938797f618e5b3228074fa (diff)
[XLA] Read GPU architecture from StreamExecutor, rather than a flag.
Previously, we read the GPU architecture from the --gpu_architecture flag, which defaulted to compute_35. We'd then use this value when choosing a libdevice file and when telling LLVM which GPU architecture we're compiling for. Now we read this value from the StreamExecutor for the device on which we're going to run our computation. This change also adds more supported GPU architectures to the GPU backend, so we choose the right libdevice for your GPU. This change is necessary before we can begin emitting fp16 arithmetic and other sm_60+ ops. Change: 147971326
Diffstat (limited to 'tensorflow/compiler/xla/legacy_flags')
-rw-r--r--tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.cc3
-rw-r--r--tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.h1
2 files changed, 0 insertions, 4 deletions
diff --git a/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.cc b/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.cc
index c355b1ed9b..f8f6ea26b1 100644
--- a/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.cc
+++ b/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.cc
@@ -38,7 +38,6 @@ static void AllocateFlags() {
flags->dump_temp_products_to = "";
flags->ftz = false;
flags->fma = true;
- flags->gpu_architecture = "compute_35";
flags->verbose_ptx_asm = false;
flags->kernel = "";
flags->llvm_dump_passes = false;
@@ -51,8 +50,6 @@ static void AllocateFlags() {
"If empty, no dump is produced"),
tensorflow::Flag("ftz", &flags->ftz, "flush to zero semantics"),
tensorflow::Flag("fma", &flags->fma, "use FMA synthesis"),
- tensorflow::Flag("gpu_architecture", &flags->gpu_architecture,
- "GPU architecture"),
tensorflow::Flag("verbose_ptx_asm", &flags->verbose_ptx_asm,
"emit PTX assembly with extra comments"),
tensorflow::Flag("kernel", &flags->kernel,
diff --git a/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.h b/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.h
index fbb8863454..31cb50e9da 100644
--- a/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.h
+++ b/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.h
@@ -36,7 +36,6 @@ typedef struct {
string dump_temp_products_to; // temporary compilation products dir
bool ftz; // flush to zero semantics
bool fma; // use FMA synthesis
- string gpu_architecture; // GPU architecture
bool verbose_ptx_asm; // emit PTX assembly with extra comments
string kernel; // only emit the IR and PTX for this kernel
bool llvm_dump_passes; // dump the passes LLVM runs to stderr