diff options
author | 2017-02-19 12:33:44 -0800 | |
---|---|---|
committer | 2017-02-19 12:53:26 -0800 | |
commit | 7b02fa6a27022275517ed5b851b06ba19a11bdf0 (patch) | |
tree | bcb4be30b332a11f57dc6677196307fe5f6d981e /tensorflow/compiler/xla/legacy_flags | |
parent | 2c3469018589ffece9938797f618e5b3228074fa (diff) |
[XLA] Read GPU architecture from StreamExecutor, rather than a flag.
Previously, we read the GPU architecture from the --gpu_architecture
flag, which defaulted to compute_35. We'd then use this value when
choosing a libdevice file and when telling LLVM which GPU architecture
we're compiling for.
Now we read this value from the StreamExecutor for the device on which
we're going to run our computation.
This change also adds more supported GPU architectures to the GPU
backend, so we choose the right libdevice for your GPU.
This change is necessary before we can begin emitting fp16 arithmetic
and other sm_60+ ops.
Change: 147971326
Diffstat (limited to 'tensorflow/compiler/xla/legacy_flags')
-rw-r--r-- | tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.cc | 3 | ||||
-rw-r--r-- | tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.h | 1 |
2 files changed, 0 insertions, 4 deletions
diff --git a/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.cc b/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.cc index c355b1ed9b..f8f6ea26b1 100644 --- a/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.cc +++ b/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.cc @@ -38,7 +38,6 @@ static void AllocateFlags() { flags->dump_temp_products_to = ""; flags->ftz = false; flags->fma = true; - flags->gpu_architecture = "compute_35"; flags->verbose_ptx_asm = false; flags->kernel = ""; flags->llvm_dump_passes = false; @@ -51,8 +50,6 @@ static void AllocateFlags() { "If empty, no dump is produced"), tensorflow::Flag("ftz", &flags->ftz, "flush to zero semantics"), tensorflow::Flag("fma", &flags->fma, "use FMA synthesis"), - tensorflow::Flag("gpu_architecture", &flags->gpu_architecture, - "GPU architecture"), tensorflow::Flag("verbose_ptx_asm", &flags->verbose_ptx_asm, "emit PTX assembly with extra comments"), tensorflow::Flag("kernel", &flags->kernel, diff --git a/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.h b/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.h index fbb8863454..31cb50e9da 100644 --- a/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.h +++ b/tensorflow/compiler/xla/legacy_flags/gpu_backend_lib_flags.h @@ -36,7 +36,6 @@ typedef struct { string dump_temp_products_to; // temporary compilation products dir bool ftz; // flush to zero semantics bool fma; // use FMA synthesis - string gpu_architecture; // GPU architecture bool verbose_ptx_asm; // emit PTX assembly with extra comments string kernel; // only emit the IR and PTX for this kernel bool llvm_dump_passes; // dump the passes LLVM runs to stderr |