aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/compiler/xla/service/cpu/cpu_compiler.cc')
-rw-r--r--tensorflow/compiler/xla/service/cpu/cpu_compiler.cc13
1 files changed, 8 insertions, 5 deletions
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 2df959c4dc..35154af048 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -88,6 +88,7 @@ limitations under the License.
#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
#include "tensorflow/compiler/xla/service/reduce_precision_insertion.h"
#include "tensorflow/compiler/xla/service/reshape_mover.h"
+#include "tensorflow/compiler/xla/service/scatter_expander.h"
#include "tensorflow/compiler/xla/service/transpose_folding.h"
#include "tensorflow/compiler/xla/service/tuple_simplifier.h"
#include "tensorflow/compiler/xla/service/while_loop_constant_sinking.h"
@@ -299,6 +300,8 @@ Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile,
pipeline.AddPass<HloCSE>(/*is_layout_sensitive=*/false);
pipeline.AddPass<CpuInstructionFusion>();
+ pipeline.AddPass<ScatterExpander>();
+
ReducePrecisionInsertion::AddPasses(
&pipeline, module->config().debug_options(),
ReducePrecisionInsertion::PassTiming::AFTER_FUSION);
@@ -356,7 +359,7 @@ llvm::TargetOptions CompilerTargetOptions(
llvm::TargetOptions target_options;
llvm_ir::SetTargetOptions(
/*fast_math_enabled=*/module_config.debug_options()
- .xla_enable_fast_math(),
+ .xla_cpu_enable_fast_math(),
&target_options);
return target_options;
}
@@ -523,7 +526,7 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
CompilerTargetOptions(module->config()),
CodeGenOptLevel(module->config()),
options::OptimizeForSizeRequested(module->config()),
- module->config().debug_options().xla_enable_fast_math(),
+ module->config().debug_options().xla_cpu_enable_fast_math(),
module->config().debug_options().xla_llvm_disable_expensive_passes(),
pre_optimization_ir_hook, post_optimization_ir_hook);
llvm_module->setDataLayout(jit->data_layout());
@@ -653,9 +656,9 @@ CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
// so we bail if the configs have conflicting flags. At the moment, the only
// flag that needs to be consistent is fast-math.
const bool fast_math_enabled =
- modules[0]->config().debug_options().xla_enable_fast_math();
+ modules[0]->config().debug_options().xla_cpu_enable_fast_math();
for (const auto& module : modules) {
- if (module->config().debug_options().xla_enable_fast_math() !=
+ if (module->config().debug_options().xla_cpu_enable_fast_math() !=
fast_math_enabled) {
return InvalidArgument(
"All HLO module configs must have the same value for "
@@ -832,7 +835,7 @@ CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
CompilerFunctor compiler_functor(
target_machine.get(), &disassembler, opt_level,
options::OptimizeForSizeRequested(module->config()),
- module->config().debug_options().xla_enable_fast_math(),
+ module->config().debug_options().xla_cpu_enable_fast_math(),
module->config().debug_options().xla_llvm_disable_expensive_passes(),
pre_optimization_ir_dump_hook, post_optimization_ir_dump_hook);
std::unique_ptr<llvm::MemoryBuffer> object_file =