From 3f888e1539db5551cfcf9ee837a0555c224e0018 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 14:16:09 -0800 Subject: Add a Compiler::BuildExecutable interface that compiles the given Hlo module without optimizations. PiperOrigin-RevId: 176158846 --- tensorflow/compiler/xla/service/compiler.h | 17 +++++++++++--- .../compiler/xla/service/cpu/cpu_compiler.cc | 26 +++++++++++++--------- tensorflow/compiler/xla/service/cpu/cpu_compiler.h | 6 ++++- .../compiler/xla/service/gpu/gpu_compiler.cc | 22 ++++++++++-------- tensorflow/compiler/xla/service/gpu/gpu_compiler.h | 6 ++++- tensorflow/compiler/xla/service/hlo_runner.cc | 19 +++++++++++----- tensorflow/compiler/xla/service/hlo_runner.h | 15 ++++++++----- .../compiler/xla/service/interpreter/compiler.cc | 12 ++++++++-- .../compiler/xla/service/interpreter/compiler.h | 8 +++++-- tensorflow/compiler/xla/service/llvm_compiler.cc | 4 +++- tensorflow/compiler/xla/service/llvm_compiler.h | 12 ++++++---- tensorflow/compiler/xla/service/service.cc | 5 ++++- tensorflow/compiler/xla/tests/codegen_test_base.cc | 7 ++++-- .../compiler/xla/tests/llvm_compiler_test.cc | 4 ++-- 14 files changed, 114 insertions(+), 49 deletions(-) diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h index 5f021900c8..fc67330f5c 100644 --- a/tensorflow/compiler/xla/service/compiler.h +++ b/tensorflow/compiler/xla/service/compiler.h @@ -97,21 +97,32 @@ class Compiler { // Returns the ID of the platform that this compiler targets. virtual perftools::gputools::Platform::Id PlatformId() const = 0; + // Runs Hlo passes to optimize the given Hlo module, returns the optimized + // module. + virtual StatusOr> RunHloPasses( + std::unique_ptr module, + perftools::gputools::StreamExecutor* executor) = 0; + // Compiles the HLO module for execution on a device given by the executor, - // and returns an executable object or an error status. Takes ownership of the - // HLO module and is free to transform it. + // and returns an executable object or an error status. No HLO passes are + // applied to module. Generally a module should be passed through RunHloPasses + // prior to calling this method because the some HLO passes are required for + // correctness. Takes ownership of the HLO module and is free to transform it. // // The compiler may optionally specialize to the individual device // (not just type of device) indicated by the executor. // // Use the overload below to compile computations that run in parallel. - virtual StatusOr> Compile( + virtual StatusOr> RunBackend( std::unique_ptr module, perftools::gputools::StreamExecutor* executor) = 0; // Compiles a set of HLO modules that can run in parallel, potentially // communicating data between the modules, and returns a corresponding // sequence of executable objects. + // + // TODO(b/68666782): Remove this method after adding support for multiple + // modules to RunHloPasses and RunBackends. virtual StatusOr>> Compile( std::vector> modules, std::vector> diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index f5b95d3657..b04a279395 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -426,8 +426,22 @@ Status InitializeModuleHooks( } // namespace -StatusOr> CpuCompiler::Compile( - std::unique_ptr module, se::StreamExecutor* stream_exec) { +StatusOr> CpuCompiler::RunHloPasses( + std::unique_ptr module, + perftools::gputools::StreamExecutor* /*stream_exec*/) { + VLOG(2) << "Before optimization:"; + XLA_VLOG_LINES(2, module->ToString()); + + TF_RETURN_IF_ERROR(RunHloPasses(module.get(), /*is_aot_compile=*/false)); + + VLOG(2) << "After optimization:"; + XLA_VLOG_LINES(2, module->ToString()); + return std::move(module); +} + +StatusOr> CpuCompiler::RunBackend( + std::unique_ptr module, + perftools::gputools::StreamExecutor* stream_exec) { const string timer_message = "Compiling [" + module->name() + "] for CPU using JIT"; ScopedLoggingTimer compiling_timer(timer_message, 1); @@ -458,14 +472,6 @@ StatusOr> CpuCompiler::Compile( llvm_module->setDataLayout(jit->data_layout()); llvm_module->setTargetTriple(jit->target_triple().getTriple()); - VLOG(2) << "Before optimization:"; - XLA_VLOG_LINES(2, module->ToString()); - - TF_RETURN_IF_ERROR(RunHloPasses(module.get(), /*is_aot_compile=*/false)); - - VLOG(2) << "After optimization:"; - XLA_VLOG_LINES(2, module->ToString()); - HloComputation* computation = module->entry_computation(); std::unordered_map hlo_to_profile_idx; if (module->config().hlo_profiling_enabled()) { diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h index 963aced208..ebed7058d8 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h @@ -116,7 +116,11 @@ class CpuCompiler : public LLVMCompiler { // stream_execs) using LLVMCompiler::Compile; - StatusOr> Compile( + StatusOr> RunHloPasses( + std::unique_ptr module, + perftools::gputools::StreamExecutor* stream_exec) override; + + StatusOr> RunBackend( std::unique_ptr module, perftools::gputools::StreamExecutor* stream_exec) override; diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 23fb308ec6..937d453a5c 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -126,7 +126,7 @@ string GetLibdeviceDir(const string& config_cuda_data_dir) { // Runs optimization passes on the given HLO module. tensorflow::Status OptimizeHloModule( - HloModule* hlo_module, const se::DeviceDescription& device_desc, + HloModule* hlo_module, const HloCostAnalysis::ShapeSizeFunction& shape_size_function) { { HloPassPipeline pipeline("optimization"); @@ -297,19 +297,23 @@ StatusOr> CompilePtx(const string& ptx, int cc_major, GpuCompiler::GpuCompiler() : pointer_size_(llvm::DataLayout(kDataLayout).getPointerSize()) {} -StatusOr> GpuCompiler::Compile( - std::unique_ptr module, se::StreamExecutor* stream_exec) { - TF_RET_CHECK(stream_exec != nullptr); - +StatusOr> GpuCompiler::RunHloPasses( + std::unique_ptr module, se::StreamExecutor* /*stream_exec*/) { { Tracing::TraceMe annotation("HLO Transforms", module->name(), /*is_expensive=*/true); - TF_RETURN_IF_ERROR(OptimizeHloModule(module.get(), - stream_exec->GetDeviceDescription(), - ShapeSizeBytesFunction())); TF_RETURN_IF_ERROR( - PrepareHloModuleForIrEmitting(module.get(), ShapeSizeBytesFunction())); + OptimizeHloModule(module.get(), ShapeSizeBytesFunction())); } + return std::move(module); +} + +StatusOr> GpuCompiler::RunBackend( + std::unique_ptr module, se::StreamExecutor* stream_exec) { + TF_RET_CHECK(stream_exec != nullptr); + + TF_RETURN_IF_ERROR( + PrepareHloModuleForIrEmitting(module.get(), ShapeSizeBytesFunction())); llvm::LLVMContext llvm_context; std::string buffer; diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.h b/tensorflow/compiler/xla/service/gpu/gpu_compiler.h index fe5fce615f..18e3434020 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.h @@ -49,7 +49,11 @@ class GpuCompiler : public LLVMCompiler { // stream_execs) using LLVMCompiler::Compile; - StatusOr> Compile( + StatusOr> RunHloPasses( + std::unique_ptr module, + perftools::gputools::StreamExecutor* stream_exec) override; + + StatusOr> RunBackend( std::unique_ptr module, perftools::gputools::StreamExecutor* stream_exec) override; diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc index 63f2b1296e..6b6d48233a 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.cc +++ b/tensorflow/compiler/xla/service/hlo_runner.cc @@ -114,11 +114,16 @@ HloRunner::~HloRunner() { StatusOr HloRunner::Execute( std::unique_ptr module, tensorflow::gtl::ArraySlice arguments, - Shape* result_shape) { + Shape* result_shape, bool run_hlo_passes) { + if (run_hlo_passes) { + TF_ASSIGN_OR_RETURN( + module, backend().compiler()->RunHloPasses( + std::move(module), backend().default_stream_executor())); + } TF_ASSIGN_OR_RETURN( std::unique_ptr executable, - backend().compiler()->Compile(std::move(module), - backend().default_stream_executor())); + backend().compiler()->RunBackend(std::move(module), + backend().default_stream_executor())); se::Stream stream(backend().default_stream_executor()); stream.Init(); @@ -193,10 +198,12 @@ StatusOr> HloRunner::TransferFromDevice( StatusOr> HloRunner::ExecuteAndTransfer( std::unique_ptr module, - tensorflow::gtl::ArraySlice arguments) { + tensorflow::gtl::ArraySlice arguments, + bool run_hlo_passes) { Shape result_shape; - TF_ASSIGN_OR_RETURN(se::DeviceMemoryBase device_base, - Execute(std::move(module), arguments, &result_shape)); + TF_ASSIGN_OR_RETURN( + se::DeviceMemoryBase device_base, + Execute(std::move(module), arguments, &result_shape, run_hlo_passes)); return TransferFromDevice(result_shape, device_base); } diff --git a/tensorflow/compiler/xla/service/hlo_runner.h b/tensorflow/compiler/xla/service/hlo_runner.h index a5732848c6..95cddafc91 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.h +++ b/tensorflow/compiler/xla/service/hlo_runner.h @@ -65,17 +65,20 @@ class HloRunner { // Executes the given module with given literals as input and returns the // result as a Literal. The LiteralPtr type accepts Literal* or // std::unique_ptr. + // If run_hlo_passes is true, the module will be executed without Hlo + // optimization. template StatusOr> Execute( std::unique_ptr module, - const tensorflow::gtl::ArraySlice literals); + const tensorflow::gtl::ArraySlice literals, + bool run_hlo_passes = true); // Executes the given module and returns a global data handle. StatusOr Execute( std::unique_ptr module, tensorflow::gtl::ArraySlice arguments, - Shape* result_shape); + Shape* result_shape, bool run_hlo_passes = true); // Transfers the given literal to the device and returns the data handle. StatusOr TransferToDevice( @@ -90,7 +93,8 @@ class HloRunner { StatusOr> ExecuteAndTransfer( std::unique_ptr module, tensorflow::gtl::ArraySlice - arguments); + arguments, + bool run_hlo_passes = true); // If backend is not created in the constructor, creates and returns the // default backend. If creation fails, crashes the program. @@ -112,14 +116,15 @@ class HloRunner { template StatusOr> HloRunner::Execute( std::unique_ptr module, - const tensorflow::gtl::ArraySlice literals) { + const tensorflow::gtl::ArraySlice literals, + bool run_hlo_passes) { std::vector arguments; for (const auto& literal : literals) { TF_ASSIGN_OR_RETURN(perftools::gputools::DeviceMemoryBase argument, TransferToDevice(*literal)); arguments.push_back(argument); } - return ExecuteAndTransfer(std::move(module), arguments); + return ExecuteAndTransfer(std::move(module), arguments, run_hlo_passes); } } // namespace xla diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc index 6d5796a24b..c9a5285a4f 100644 --- a/tensorflow/compiler/xla/service/interpreter/compiler.cc +++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc @@ -69,11 +69,19 @@ Status InterpreterCompiler::RunHloOptimization(HloModule* hlo_module) { return pipeline.Run(hlo_module).status(); } -StatusOr> InterpreterCompiler::Compile( +StatusOr> InterpreterCompiler::RunHloPasses( + std::unique_ptr hlo_module, + se::StreamExecutor* /*stream_exec*/) { + VLOG(1) << "Run hlo passes on graph " << hlo_module->name(); + TF_RETURN_IF_ERROR(RunHloOptimization(hlo_module.get())); + return std::move(hlo_module); +} + +StatusOr> InterpreterCompiler::RunBackend( std::unique_ptr hlo_module, se::StreamExecutor* stream_exec) { TF_RET_CHECK(stream_exec != nullptr); - VLOG(1) << "Generate graph " << hlo_module->name(); + VLOG(1) << "Run backend " << hlo_module->name(); TF_RETURN_IF_ERROR(RunHloOptimization(hlo_module.get())); diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.h b/tensorflow/compiler/xla/service/interpreter/compiler.h index cfdc9b6256..278cf51842 100644 --- a/tensorflow/compiler/xla/service/interpreter/compiler.h +++ b/tensorflow/compiler/xla/service/interpreter/compiler.h @@ -43,8 +43,12 @@ class InterpreterCompiler : public Compiler { InterpreterCompiler() {} ~InterpreterCompiler() override {} - StatusOr> Compile( - std::unique_ptr hlo_modules, + StatusOr> RunHloPasses( + std::unique_ptr hlo_module, + perftools::gputools::StreamExecutor* stream_exec) override; + + StatusOr> RunBackend( + std::unique_ptr hlo_module, perftools::gputools::StreamExecutor* stream_exec) override; StatusOr>> Compile( diff --git a/tensorflow/compiler/xla/service/llvm_compiler.cc b/tensorflow/compiler/xla/service/llvm_compiler.cc index ba0304fb8c..34f3419269 100644 --- a/tensorflow/compiler/xla/service/llvm_compiler.cc +++ b/tensorflow/compiler/xla/service/llvm_compiler.cc @@ -27,8 +27,10 @@ StatusOr>> LLVMCompiler::Compile( "Model partitioning not implemented for the CPU/GPU compilers!"); } + TF_ASSIGN_OR_RETURN( + modules[i], RunHloPasses(std::move(modules[i]), stream_execs[i][0])); TF_ASSIGN_OR_RETURN(std::unique_ptr executable, - Compile(std::move(modules[i]), stream_execs[i][0])); + RunBackend(std::move(modules[i]), stream_execs[i][0])); result.push_back(std::move(executable)); } diff --git a/tensorflow/compiler/xla/service/llvm_compiler.h b/tensorflow/compiler/xla/service/llvm_compiler.h index c4f689eabe..c5393cef4f 100644 --- a/tensorflow/compiler/xla/service/llvm_compiler.h +++ b/tensorflow/compiler/xla/service/llvm_compiler.h @@ -58,10 +58,14 @@ class LLVMCompiler : public Compiler { void RemovePostOptimizationHook() { user_post_optimization_hook_ = nullptr; } // Bring in - // StatusOr> Compile( - // std::unique_ptr module, - // perftools::gputools::StreamExecutor* executor) - using Compiler::Compile; + // StatusOr> RunBackend( + // std::unique_ptr module, + // perftools::gputools::StreamExecutor* stream_exec) + // StatusOr> RunHloPasses( + // std::unique_ptr module, + // perftools::gputools::StreamExecutor* stream_exec) + using Compiler::RunBackend; + using Compiler::RunHloPasses; StatusOr>> Compile( std::vector> modules, diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index ee9501dd48..0544a1697b 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -430,9 +430,12 @@ StatusOr> Service::BuildExecutable( /*include_unreachable_instructions=*/ true)); + TF_ASSIGN_OR_RETURN( + module, backend->compiler()->RunHloPasses(std::move(module), executor)); + TF_ASSIGN_OR_RETURN( std::unique_ptr executable, - backend->compiler()->Compile(std::move(module), executor)); + backend->compiler()->RunBackend(std::move(module), executor)); if (!other_directory_path.empty()) { executable->set_session_module(std::move(session_module)); diff --git a/tensorflow/compiler/xla/tests/codegen_test_base.cc b/tensorflow/compiler/xla/tests/codegen_test_base.cc index 43ea7f6019..e472408dcf 100644 --- a/tensorflow/compiler/xla/tests/codegen_test_base.cc +++ b/tensorflow/compiler/xla/tests/codegen_test_base.cc @@ -19,8 +19,11 @@ namespace xla { StatusOr> CodegenTestBase::CompileToExecutable( std::unique_ptr hlo_module) { - return backend().compiler()->Compile(std::move(hlo_module), - backend().default_stream_executor()); + TF_ASSIGN_OR_RETURN(hlo_module, backend().compiler()->RunHloPasses( + std::move(hlo_module), + backend().default_stream_executor())); + return backend().compiler()->RunBackend(std::move(hlo_module), + backend().default_stream_executor()); } StatusOr> diff --git a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc index 62fab6a224..b5b95967ff 100644 --- a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc +++ b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc @@ -73,8 +73,8 @@ class LLVMCompilerTest : public ::testing::Test { compiler->SetPostOptimizationHook(post_opt_hook); ASSERT_TRUE(compiler - ->Compile(std::move(hlo_module), - backend_->default_stream_executor()) + ->RunBackend(std::move(hlo_module), + backend_->default_stream_executor()) .ok()); // Test that hooks were called. -- cgit v1.2.3