diff options
author | 2017-11-17 14:16:09 -0800 | |
---|---|---|
committer | 2017-11-17 14:20:28 -0800 | |
commit | 3f888e1539db5551cfcf9ee837a0555c224e0018 (patch) | |
tree | 5f2df45e666fc15e370e6c029bf0712ee65d53ed /tensorflow/compiler/xla/service | |
parent | d79dd4993061670c1ec5ea01db3022f28d72d0a3 (diff) |
Add a Compiler::BuildExecutable interface that compiles the given Hlo module without optimizations.
PiperOrigin-RevId: 176158846
Diffstat (limited to 'tensorflow/compiler/xla/service')
-rw-r--r-- | tensorflow/compiler/xla/service/compiler.h | 17 | ||||
-rw-r--r-- | tensorflow/compiler/xla/service/cpu/cpu_compiler.cc | 26 | ||||
-rw-r--r-- | tensorflow/compiler/xla/service/cpu/cpu_compiler.h | 6 | ||||
-rw-r--r-- | tensorflow/compiler/xla/service/gpu/gpu_compiler.cc | 22 | ||||
-rw-r--r-- | tensorflow/compiler/xla/service/gpu/gpu_compiler.h | 6 | ||||
-rw-r--r-- | tensorflow/compiler/xla/service/hlo_runner.cc | 19 | ||||
-rw-r--r-- | tensorflow/compiler/xla/service/hlo_runner.h | 15 | ||||
-rw-r--r-- | tensorflow/compiler/xla/service/interpreter/compiler.cc | 12 | ||||
-rw-r--r-- | tensorflow/compiler/xla/service/interpreter/compiler.h | 8 | ||||
-rw-r--r-- | tensorflow/compiler/xla/service/llvm_compiler.cc | 4 | ||||
-rw-r--r-- | tensorflow/compiler/xla/service/llvm_compiler.h | 12 | ||||
-rw-r--r-- | tensorflow/compiler/xla/service/service.cc | 5 |
12 files changed, 107 insertions, 45 deletions
diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h index 5f021900c8..fc67330f5c 100644 --- a/tensorflow/compiler/xla/service/compiler.h +++ b/tensorflow/compiler/xla/service/compiler.h @@ -97,21 +97,32 @@ class Compiler { // Returns the ID of the platform that this compiler targets. virtual perftools::gputools::Platform::Id PlatformId() const = 0; + // Runs Hlo passes to optimize the given Hlo module, returns the optimized + // module. + virtual StatusOr<std::unique_ptr<HloModule>> RunHloPasses( + std::unique_ptr<HloModule> module, + perftools::gputools::StreamExecutor* executor) = 0; + // Compiles the HLO module for execution on a device given by the executor, - // and returns an executable object or an error status. Takes ownership of the - // HLO module and is free to transform it. + // and returns an executable object or an error status. No HLO passes are + // applied to module. Generally a module should be passed through RunHloPasses + // prior to calling this method because the some HLO passes are required for + // correctness. Takes ownership of the HLO module and is free to transform it. // // The compiler may optionally specialize to the individual device // (not just type of device) indicated by the executor. // // Use the overload below to compile computations that run in parallel. - virtual StatusOr<std::unique_ptr<Executable>> Compile( + virtual StatusOr<std::unique_ptr<Executable>> RunBackend( std::unique_ptr<HloModule> module, perftools::gputools::StreamExecutor* executor) = 0; // Compiles a set of HLO modules that can run in parallel, potentially // communicating data between the modules, and returns a corresponding // sequence of executable objects. + // + // TODO(b/68666782): Remove this method after adding support for multiple + // modules to RunHloPasses and RunBackends. virtual StatusOr<std::vector<std::unique_ptr<Executable>>> Compile( std::vector<std::unique_ptr<HloModule>> modules, std::vector<std::vector<perftools::gputools::StreamExecutor*>> diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index f5b95d3657..b04a279395 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -426,8 +426,22 @@ Status InitializeModuleHooks( } // namespace -StatusOr<std::unique_ptr<Executable>> CpuCompiler::Compile( - std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec) { +StatusOr<std::unique_ptr<HloModule>> CpuCompiler::RunHloPasses( + std::unique_ptr<HloModule> module, + perftools::gputools::StreamExecutor* /*stream_exec*/) { + VLOG(2) << "Before optimization:"; + XLA_VLOG_LINES(2, module->ToString()); + + TF_RETURN_IF_ERROR(RunHloPasses(module.get(), /*is_aot_compile=*/false)); + + VLOG(2) << "After optimization:"; + XLA_VLOG_LINES(2, module->ToString()); + return std::move(module); +} + +StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend( + std::unique_ptr<HloModule> module, + perftools::gputools::StreamExecutor* stream_exec) { const string timer_message = "Compiling [" + module->name() + "] for CPU using JIT"; ScopedLoggingTimer compiling_timer(timer_message, 1); @@ -458,14 +472,6 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::Compile( llvm_module->setDataLayout(jit->data_layout()); llvm_module->setTargetTriple(jit->target_triple().getTriple()); - VLOG(2) << "Before optimization:"; - XLA_VLOG_LINES(2, module->ToString()); - - TF_RETURN_IF_ERROR(RunHloPasses(module.get(), /*is_aot_compile=*/false)); - - VLOG(2) << "After optimization:"; - XLA_VLOG_LINES(2, module->ToString()); - HloComputation* computation = module->entry_computation(); std::unordered_map<const HloInstruction*, size_t> hlo_to_profile_idx; if (module->config().hlo_profiling_enabled()) { diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h index 963aced208..ebed7058d8 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h @@ -116,7 +116,11 @@ class CpuCompiler : public LLVMCompiler { // stream_execs) using LLVMCompiler::Compile; - StatusOr<std::unique_ptr<Executable>> Compile( + StatusOr<std::unique_ptr<HloModule>> RunHloPasses( + std::unique_ptr<HloModule> module, + perftools::gputools::StreamExecutor* stream_exec) override; + + StatusOr<std::unique_ptr<Executable>> RunBackend( std::unique_ptr<HloModule> module, perftools::gputools::StreamExecutor* stream_exec) override; diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 23fb308ec6..937d453a5c 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -126,7 +126,7 @@ string GetLibdeviceDir(const string& config_cuda_data_dir) { // Runs optimization passes on the given HLO module. tensorflow::Status OptimizeHloModule( - HloModule* hlo_module, const se::DeviceDescription& device_desc, + HloModule* hlo_module, const HloCostAnalysis::ShapeSizeFunction& shape_size_function) { { HloPassPipeline pipeline("optimization"); @@ -297,19 +297,23 @@ StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major, GpuCompiler::GpuCompiler() : pointer_size_(llvm::DataLayout(kDataLayout).getPointerSize()) {} -StatusOr<std::unique_ptr<Executable>> GpuCompiler::Compile( - std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec) { - TF_RET_CHECK(stream_exec != nullptr); - +StatusOr<std::unique_ptr<HloModule>> GpuCompiler::RunHloPasses( + std::unique_ptr<HloModule> module, se::StreamExecutor* /*stream_exec*/) { { Tracing::TraceMe annotation("HLO Transforms", module->name(), /*is_expensive=*/true); - TF_RETURN_IF_ERROR(OptimizeHloModule(module.get(), - stream_exec->GetDeviceDescription(), - ShapeSizeBytesFunction())); TF_RETURN_IF_ERROR( - PrepareHloModuleForIrEmitting(module.get(), ShapeSizeBytesFunction())); + OptimizeHloModule(module.get(), ShapeSizeBytesFunction())); } + return std::move(module); +} + +StatusOr<std::unique_ptr<Executable>> GpuCompiler::RunBackend( + std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec) { + TF_RET_CHECK(stream_exec != nullptr); + + TF_RETURN_IF_ERROR( + PrepareHloModuleForIrEmitting(module.get(), ShapeSizeBytesFunction())); llvm::LLVMContext llvm_context; std::string buffer; diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.h b/tensorflow/compiler/xla/service/gpu/gpu_compiler.h index fe5fce615f..18e3434020 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.h @@ -49,7 +49,11 @@ class GpuCompiler : public LLVMCompiler { // stream_execs) using LLVMCompiler::Compile; - StatusOr<std::unique_ptr<Executable>> Compile( + StatusOr<std::unique_ptr<HloModule>> RunHloPasses( + std::unique_ptr<HloModule> module, + perftools::gputools::StreamExecutor* stream_exec) override; + + StatusOr<std::unique_ptr<Executable>> RunBackend( std::unique_ptr<HloModule> module, perftools::gputools::StreamExecutor* stream_exec) override; diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc index 63f2b1296e..6b6d48233a 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.cc +++ b/tensorflow/compiler/xla/service/hlo_runner.cc @@ -114,11 +114,16 @@ HloRunner::~HloRunner() { StatusOr<se::DeviceMemoryBase> HloRunner::Execute( std::unique_ptr<HloModule> module, tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments, - Shape* result_shape) { + Shape* result_shape, bool run_hlo_passes) { + if (run_hlo_passes) { + TF_ASSIGN_OR_RETURN( + module, backend().compiler()->RunHloPasses( + std::move(module), backend().default_stream_executor())); + } TF_ASSIGN_OR_RETURN( std::unique_ptr<Executable> executable, - backend().compiler()->Compile(std::move(module), - backend().default_stream_executor())); + backend().compiler()->RunBackend(std::move(module), + backend().default_stream_executor())); se::Stream stream(backend().default_stream_executor()); stream.Init(); @@ -193,10 +198,12 @@ StatusOr<std::unique_ptr<Literal>> HloRunner::TransferFromDevice( StatusOr<std::unique_ptr<Literal>> HloRunner::ExecuteAndTransfer( std::unique_ptr<HloModule> module, - tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments) { + tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments, + bool run_hlo_passes) { Shape result_shape; - TF_ASSIGN_OR_RETURN(se::DeviceMemoryBase device_base, - Execute(std::move(module), arguments, &result_shape)); + TF_ASSIGN_OR_RETURN( + se::DeviceMemoryBase device_base, + Execute(std::move(module), arguments, &result_shape, run_hlo_passes)); return TransferFromDevice(result_shape, device_base); } diff --git a/tensorflow/compiler/xla/service/hlo_runner.h b/tensorflow/compiler/xla/service/hlo_runner.h index a5732848c6..95cddafc91 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.h +++ b/tensorflow/compiler/xla/service/hlo_runner.h @@ -65,17 +65,20 @@ class HloRunner { // Executes the given module with given literals as input and returns the // result as a Literal. The LiteralPtr type accepts Literal* or // std::unique_ptr<Literal>. + // If run_hlo_passes is true, the module will be executed without Hlo + // optimization. template <typename LiteralPtr> StatusOr<std::unique_ptr<Literal>> Execute( std::unique_ptr<HloModule> module, - const tensorflow::gtl::ArraySlice<LiteralPtr> literals); + const tensorflow::gtl::ArraySlice<LiteralPtr> literals, + bool run_hlo_passes = true); // Executes the given module and returns a global data handle. StatusOr<perftools::gputools::DeviceMemoryBase> Execute( std::unique_ptr<HloModule> module, tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase> arguments, - Shape* result_shape); + Shape* result_shape, bool run_hlo_passes = true); // Transfers the given literal to the device and returns the data handle. StatusOr<perftools::gputools::DeviceMemoryBase> TransferToDevice( @@ -90,7 +93,8 @@ class HloRunner { StatusOr<std::unique_ptr<Literal>> ExecuteAndTransfer( std::unique_ptr<HloModule> module, tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase> - arguments); + arguments, + bool run_hlo_passes = true); // If backend is not created in the constructor, creates and returns the // default backend. If creation fails, crashes the program. @@ -112,14 +116,15 @@ class HloRunner { template <typename LiteralPtr> StatusOr<std::unique_ptr<Literal>> HloRunner::Execute( std::unique_ptr<HloModule> module, - const tensorflow::gtl::ArraySlice<LiteralPtr> literals) { + const tensorflow::gtl::ArraySlice<LiteralPtr> literals, + bool run_hlo_passes) { std::vector<perftools::gputools::DeviceMemoryBase> arguments; for (const auto& literal : literals) { TF_ASSIGN_OR_RETURN(perftools::gputools::DeviceMemoryBase argument, TransferToDevice(*literal)); arguments.push_back(argument); } - return ExecuteAndTransfer(std::move(module), arguments); + return ExecuteAndTransfer(std::move(module), arguments, run_hlo_passes); } } // namespace xla diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc index 6d5796a24b..c9a5285a4f 100644 --- a/tensorflow/compiler/xla/service/interpreter/compiler.cc +++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc @@ -69,11 +69,19 @@ Status InterpreterCompiler::RunHloOptimization(HloModule* hlo_module) { return pipeline.Run(hlo_module).status(); } -StatusOr<std::unique_ptr<Executable>> InterpreterCompiler::Compile( +StatusOr<std::unique_ptr<HloModule>> InterpreterCompiler::RunHloPasses( + std::unique_ptr<HloModule> hlo_module, + se::StreamExecutor* /*stream_exec*/) { + VLOG(1) << "Run hlo passes on graph " << hlo_module->name(); + TF_RETURN_IF_ERROR(RunHloOptimization(hlo_module.get())); + return std::move(hlo_module); +} + +StatusOr<std::unique_ptr<Executable>> InterpreterCompiler::RunBackend( std::unique_ptr<HloModule> hlo_module, se::StreamExecutor* stream_exec) { TF_RET_CHECK(stream_exec != nullptr); - VLOG(1) << "Generate graph " << hlo_module->name(); + VLOG(1) << "Run backend " << hlo_module->name(); TF_RETURN_IF_ERROR(RunHloOptimization(hlo_module.get())); diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.h b/tensorflow/compiler/xla/service/interpreter/compiler.h index cfdc9b6256..278cf51842 100644 --- a/tensorflow/compiler/xla/service/interpreter/compiler.h +++ b/tensorflow/compiler/xla/service/interpreter/compiler.h @@ -43,8 +43,12 @@ class InterpreterCompiler : public Compiler { InterpreterCompiler() {} ~InterpreterCompiler() override {} - StatusOr<std::unique_ptr<Executable>> Compile( - std::unique_ptr<HloModule> hlo_modules, + StatusOr<std::unique_ptr<HloModule>> RunHloPasses( + std::unique_ptr<HloModule> hlo_module, + perftools::gputools::StreamExecutor* stream_exec) override; + + StatusOr<std::unique_ptr<Executable>> RunBackend( + std::unique_ptr<HloModule> hlo_module, perftools::gputools::StreamExecutor* stream_exec) override; StatusOr<std::vector<std::unique_ptr<Executable>>> Compile( diff --git a/tensorflow/compiler/xla/service/llvm_compiler.cc b/tensorflow/compiler/xla/service/llvm_compiler.cc index ba0304fb8c..34f3419269 100644 --- a/tensorflow/compiler/xla/service/llvm_compiler.cc +++ b/tensorflow/compiler/xla/service/llvm_compiler.cc @@ -27,8 +27,10 @@ StatusOr<std::vector<std::unique_ptr<Executable>>> LLVMCompiler::Compile( "Model partitioning not implemented for the CPU/GPU compilers!"); } + TF_ASSIGN_OR_RETURN( + modules[i], RunHloPasses(std::move(modules[i]), stream_execs[i][0])); TF_ASSIGN_OR_RETURN(std::unique_ptr<Executable> executable, - Compile(std::move(modules[i]), stream_execs[i][0])); + RunBackend(std::move(modules[i]), stream_execs[i][0])); result.push_back(std::move(executable)); } diff --git a/tensorflow/compiler/xla/service/llvm_compiler.h b/tensorflow/compiler/xla/service/llvm_compiler.h index c4f689eabe..c5393cef4f 100644 --- a/tensorflow/compiler/xla/service/llvm_compiler.h +++ b/tensorflow/compiler/xla/service/llvm_compiler.h @@ -58,10 +58,14 @@ class LLVMCompiler : public Compiler { void RemovePostOptimizationHook() { user_post_optimization_hook_ = nullptr; } // Bring in - // StatusOr<std::unique_ptr<Executable>> Compile( - // std::unique_ptr<HloModule> module, - // perftools::gputools::StreamExecutor* executor) - using Compiler::Compile; + // StatusOr<std::unique_ptr<Executable>> RunBackend( + // std::unique_ptr<HloModule> module, + // perftools::gputools::StreamExecutor* stream_exec) + // StatusOr<std::unique_ptr<HloModule>> RunHloPasses( + // std::unique_ptr<HloModule> module, + // perftools::gputools::StreamExecutor* stream_exec) + using Compiler::RunBackend; + using Compiler::RunHloPasses; StatusOr<std::vector<std::unique_ptr<Executable>>> Compile( std::vector<std::unique_ptr<HloModule>> modules, diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index ee9501dd48..0544a1697b 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -431,8 +431,11 @@ StatusOr<std::unique_ptr<Executable>> Service::BuildExecutable( true)); TF_ASSIGN_OR_RETURN( + module, backend->compiler()->RunHloPasses(std::move(module), executor)); + + TF_ASSIGN_OR_RETURN( std::unique_ptr<Executable> executable, - backend->compiler()->Compile(std::move(module), executor)); + backend->compiler()->RunBackend(std::move(module), executor)); if (!other_directory_path.empty()) { executable->set_session_module(std::move(session_module)); |