aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-11-17 14:16:09 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-11-17 14:20:28 -0800
commit3f888e1539db5551cfcf9ee837a0555c224e0018 (patch)
tree5f2df45e666fc15e370e6c029bf0712ee65d53ed /tensorflow/compiler/xla/service
parentd79dd4993061670c1ec5ea01db3022f28d72d0a3 (diff)
Add a Compiler::BuildExecutable interface that compiles the given Hlo module without optimizations.
PiperOrigin-RevId: 176158846
Diffstat (limited to 'tensorflow/compiler/xla/service')
-rw-r--r--tensorflow/compiler/xla/service/compiler.h17
-rw-r--r--tensorflow/compiler/xla/service/cpu/cpu_compiler.cc26
-rw-r--r--tensorflow/compiler/xla/service/cpu/cpu_compiler.h6
-rw-r--r--tensorflow/compiler/xla/service/gpu/gpu_compiler.cc22
-rw-r--r--tensorflow/compiler/xla/service/gpu/gpu_compiler.h6
-rw-r--r--tensorflow/compiler/xla/service/hlo_runner.cc19
-rw-r--r--tensorflow/compiler/xla/service/hlo_runner.h15
-rw-r--r--tensorflow/compiler/xla/service/interpreter/compiler.cc12
-rw-r--r--tensorflow/compiler/xla/service/interpreter/compiler.h8
-rw-r--r--tensorflow/compiler/xla/service/llvm_compiler.cc4
-rw-r--r--tensorflow/compiler/xla/service/llvm_compiler.h12
-rw-r--r--tensorflow/compiler/xla/service/service.cc5
12 files changed, 107 insertions, 45 deletions
diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h
index 5f021900c8..fc67330f5c 100644
--- a/tensorflow/compiler/xla/service/compiler.h
+++ b/tensorflow/compiler/xla/service/compiler.h
@@ -97,21 +97,32 @@ class Compiler {
// Returns the ID of the platform that this compiler targets.
virtual perftools::gputools::Platform::Id PlatformId() const = 0;
+ // Runs Hlo passes to optimize the given Hlo module, returns the optimized
+ // module.
+ virtual StatusOr<std::unique_ptr<HloModule>> RunHloPasses(
+ std::unique_ptr<HloModule> module,
+ perftools::gputools::StreamExecutor* executor) = 0;
+
// Compiles the HLO module for execution on a device given by the executor,
- // and returns an executable object or an error status. Takes ownership of the
- // HLO module and is free to transform it.
+ // and returns an executable object or an error status. No HLO passes are
+ // applied to module. Generally a module should be passed through RunHloPasses
+ // prior to calling this method because the some HLO passes are required for
+ // correctness. Takes ownership of the HLO module and is free to transform it.
//
// The compiler may optionally specialize to the individual device
// (not just type of device) indicated by the executor.
//
// Use the overload below to compile computations that run in parallel.
- virtual StatusOr<std::unique_ptr<Executable>> Compile(
+ virtual StatusOr<std::unique_ptr<Executable>> RunBackend(
std::unique_ptr<HloModule> module,
perftools::gputools::StreamExecutor* executor) = 0;
// Compiles a set of HLO modules that can run in parallel, potentially
// communicating data between the modules, and returns a corresponding
// sequence of executable objects.
+ //
+ // TODO(b/68666782): Remove this method after adding support for multiple
+ // modules to RunHloPasses and RunBackends.
virtual StatusOr<std::vector<std::unique_ptr<Executable>>> Compile(
std::vector<std::unique_ptr<HloModule>> modules,
std::vector<std::vector<perftools::gputools::StreamExecutor*>>
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index f5b95d3657..b04a279395 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -426,8 +426,22 @@ Status InitializeModuleHooks(
} // namespace
-StatusOr<std::unique_ptr<Executable>> CpuCompiler::Compile(
- std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec) {
+StatusOr<std::unique_ptr<HloModule>> CpuCompiler::RunHloPasses(
+ std::unique_ptr<HloModule> module,
+ perftools::gputools::StreamExecutor* /*stream_exec*/) {
+ VLOG(2) << "Before optimization:";
+ XLA_VLOG_LINES(2, module->ToString());
+
+ TF_RETURN_IF_ERROR(RunHloPasses(module.get(), /*is_aot_compile=*/false));
+
+ VLOG(2) << "After optimization:";
+ XLA_VLOG_LINES(2, module->ToString());
+ return std::move(module);
+}
+
+StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
+ std::unique_ptr<HloModule> module,
+ perftools::gputools::StreamExecutor* stream_exec) {
const string timer_message =
"Compiling [" + module->name() + "] for CPU using JIT";
ScopedLoggingTimer compiling_timer(timer_message, 1);
@@ -458,14 +472,6 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::Compile(
llvm_module->setDataLayout(jit->data_layout());
llvm_module->setTargetTriple(jit->target_triple().getTriple());
- VLOG(2) << "Before optimization:";
- XLA_VLOG_LINES(2, module->ToString());
-
- TF_RETURN_IF_ERROR(RunHloPasses(module.get(), /*is_aot_compile=*/false));
-
- VLOG(2) << "After optimization:";
- XLA_VLOG_LINES(2, module->ToString());
-
HloComputation* computation = module->entry_computation();
std::unordered_map<const HloInstruction*, size_t> hlo_to_profile_idx;
if (module->config().hlo_profiling_enabled()) {
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h
index 963aced208..ebed7058d8 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h
@@ -116,7 +116,11 @@ class CpuCompiler : public LLVMCompiler {
// stream_execs)
using LLVMCompiler::Compile;
- StatusOr<std::unique_ptr<Executable>> Compile(
+ StatusOr<std::unique_ptr<HloModule>> RunHloPasses(
+ std::unique_ptr<HloModule> module,
+ perftools::gputools::StreamExecutor* stream_exec) override;
+
+ StatusOr<std::unique_ptr<Executable>> RunBackend(
std::unique_ptr<HloModule> module,
perftools::gputools::StreamExecutor* stream_exec) override;
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
index 23fb308ec6..937d453a5c 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@@ -126,7 +126,7 @@ string GetLibdeviceDir(const string& config_cuda_data_dir) {
// Runs optimization passes on the given HLO module.
tensorflow::Status OptimizeHloModule(
- HloModule* hlo_module, const se::DeviceDescription& device_desc,
+ HloModule* hlo_module,
const HloCostAnalysis::ShapeSizeFunction& shape_size_function) {
{
HloPassPipeline pipeline("optimization");
@@ -297,19 +297,23 @@ StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major,
GpuCompiler::GpuCompiler()
: pointer_size_(llvm::DataLayout(kDataLayout).getPointerSize()) {}
-StatusOr<std::unique_ptr<Executable>> GpuCompiler::Compile(
- std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec) {
- TF_RET_CHECK(stream_exec != nullptr);
-
+StatusOr<std::unique_ptr<HloModule>> GpuCompiler::RunHloPasses(
+ std::unique_ptr<HloModule> module, se::StreamExecutor* /*stream_exec*/) {
{
Tracing::TraceMe annotation("HLO Transforms", module->name(),
/*is_expensive=*/true);
- TF_RETURN_IF_ERROR(OptimizeHloModule(module.get(),
- stream_exec->GetDeviceDescription(),
- ShapeSizeBytesFunction()));
TF_RETURN_IF_ERROR(
- PrepareHloModuleForIrEmitting(module.get(), ShapeSizeBytesFunction()));
+ OptimizeHloModule(module.get(), ShapeSizeBytesFunction()));
}
+ return std::move(module);
+}
+
+StatusOr<std::unique_ptr<Executable>> GpuCompiler::RunBackend(
+ std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec) {
+ TF_RET_CHECK(stream_exec != nullptr);
+
+ TF_RETURN_IF_ERROR(
+ PrepareHloModuleForIrEmitting(module.get(), ShapeSizeBytesFunction()));
llvm::LLVMContext llvm_context;
std::string buffer;
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.h b/tensorflow/compiler/xla/service/gpu/gpu_compiler.h
index fe5fce615f..18e3434020 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.h
@@ -49,7 +49,11 @@ class GpuCompiler : public LLVMCompiler {
// stream_execs)
using LLVMCompiler::Compile;
- StatusOr<std::unique_ptr<Executable>> Compile(
+ StatusOr<std::unique_ptr<HloModule>> RunHloPasses(
+ std::unique_ptr<HloModule> module,
+ perftools::gputools::StreamExecutor* stream_exec) override;
+
+ StatusOr<std::unique_ptr<Executable>> RunBackend(
std::unique_ptr<HloModule> module,
perftools::gputools::StreamExecutor* stream_exec) override;
diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc
index 63f2b1296e..6b6d48233a 100644
--- a/tensorflow/compiler/xla/service/hlo_runner.cc
+++ b/tensorflow/compiler/xla/service/hlo_runner.cc
@@ -114,11 +114,16 @@ HloRunner::~HloRunner() {
StatusOr<se::DeviceMemoryBase> HloRunner::Execute(
std::unique_ptr<HloModule> module,
tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
- Shape* result_shape) {
+ Shape* result_shape, bool run_hlo_passes) {
+ if (run_hlo_passes) {
+ TF_ASSIGN_OR_RETURN(
+ module, backend().compiler()->RunHloPasses(
+ std::move(module), backend().default_stream_executor()));
+ }
TF_ASSIGN_OR_RETURN(
std::unique_ptr<Executable> executable,
- backend().compiler()->Compile(std::move(module),
- backend().default_stream_executor()));
+ backend().compiler()->RunBackend(std::move(module),
+ backend().default_stream_executor()));
se::Stream stream(backend().default_stream_executor());
stream.Init();
@@ -193,10 +198,12 @@ StatusOr<std::unique_ptr<Literal>> HloRunner::TransferFromDevice(
StatusOr<std::unique_ptr<Literal>> HloRunner::ExecuteAndTransfer(
std::unique_ptr<HloModule> module,
- tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments) {
+ tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
+ bool run_hlo_passes) {
Shape result_shape;
- TF_ASSIGN_OR_RETURN(se::DeviceMemoryBase device_base,
- Execute(std::move(module), arguments, &result_shape));
+ TF_ASSIGN_OR_RETURN(
+ se::DeviceMemoryBase device_base,
+ Execute(std::move(module), arguments, &result_shape, run_hlo_passes));
return TransferFromDevice(result_shape, device_base);
}
diff --git a/tensorflow/compiler/xla/service/hlo_runner.h b/tensorflow/compiler/xla/service/hlo_runner.h
index a5732848c6..95cddafc91 100644
--- a/tensorflow/compiler/xla/service/hlo_runner.h
+++ b/tensorflow/compiler/xla/service/hlo_runner.h
@@ -65,17 +65,20 @@ class HloRunner {
// Executes the given module with given literals as input and returns the
// result as a Literal. The LiteralPtr type accepts Literal* or
// std::unique_ptr<Literal>.
+ // If run_hlo_passes is true, the module will be executed without Hlo
+ // optimization.
template <typename LiteralPtr>
StatusOr<std::unique_ptr<Literal>> Execute(
std::unique_ptr<HloModule> module,
- const tensorflow::gtl::ArraySlice<LiteralPtr> literals);
+ const tensorflow::gtl::ArraySlice<LiteralPtr> literals,
+ bool run_hlo_passes = true);
// Executes the given module and returns a global data handle.
StatusOr<perftools::gputools::DeviceMemoryBase> Execute(
std::unique_ptr<HloModule> module,
tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
arguments,
- Shape* result_shape);
+ Shape* result_shape, bool run_hlo_passes = true);
// Transfers the given literal to the device and returns the data handle.
StatusOr<perftools::gputools::DeviceMemoryBase> TransferToDevice(
@@ -90,7 +93,8 @@ class HloRunner {
StatusOr<std::unique_ptr<Literal>> ExecuteAndTransfer(
std::unique_ptr<HloModule> module,
tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
- arguments);
+ arguments,
+ bool run_hlo_passes = true);
// If backend is not created in the constructor, creates and returns the
// default backend. If creation fails, crashes the program.
@@ -112,14 +116,15 @@ class HloRunner {
template <typename LiteralPtr>
StatusOr<std::unique_ptr<Literal>> HloRunner::Execute(
std::unique_ptr<HloModule> module,
- const tensorflow::gtl::ArraySlice<LiteralPtr> literals) {
+ const tensorflow::gtl::ArraySlice<LiteralPtr> literals,
+ bool run_hlo_passes) {
std::vector<perftools::gputools::DeviceMemoryBase> arguments;
for (const auto& literal : literals) {
TF_ASSIGN_OR_RETURN(perftools::gputools::DeviceMemoryBase argument,
TransferToDevice(*literal));
arguments.push_back(argument);
}
- return ExecuteAndTransfer(std::move(module), arguments);
+ return ExecuteAndTransfer(std::move(module), arguments, run_hlo_passes);
}
} // namespace xla
diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc
index 6d5796a24b..c9a5285a4f 100644
--- a/tensorflow/compiler/xla/service/interpreter/compiler.cc
+++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc
@@ -69,11 +69,19 @@ Status InterpreterCompiler::RunHloOptimization(HloModule* hlo_module) {
return pipeline.Run(hlo_module).status();
}
-StatusOr<std::unique_ptr<Executable>> InterpreterCompiler::Compile(
+StatusOr<std::unique_ptr<HloModule>> InterpreterCompiler::RunHloPasses(
+ std::unique_ptr<HloModule> hlo_module,
+ se::StreamExecutor* /*stream_exec*/) {
+ VLOG(1) << "Run hlo passes on graph " << hlo_module->name();
+ TF_RETURN_IF_ERROR(RunHloOptimization(hlo_module.get()));
+ return std::move(hlo_module);
+}
+
+StatusOr<std::unique_ptr<Executable>> InterpreterCompiler::RunBackend(
std::unique_ptr<HloModule> hlo_module, se::StreamExecutor* stream_exec) {
TF_RET_CHECK(stream_exec != nullptr);
- VLOG(1) << "Generate graph " << hlo_module->name();
+ VLOG(1) << "Run backend " << hlo_module->name();
TF_RETURN_IF_ERROR(RunHloOptimization(hlo_module.get()));
diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.h b/tensorflow/compiler/xla/service/interpreter/compiler.h
index cfdc9b6256..278cf51842 100644
--- a/tensorflow/compiler/xla/service/interpreter/compiler.h
+++ b/tensorflow/compiler/xla/service/interpreter/compiler.h
@@ -43,8 +43,12 @@ class InterpreterCompiler : public Compiler {
InterpreterCompiler() {}
~InterpreterCompiler() override {}
- StatusOr<std::unique_ptr<Executable>> Compile(
- std::unique_ptr<HloModule> hlo_modules,
+ StatusOr<std::unique_ptr<HloModule>> RunHloPasses(
+ std::unique_ptr<HloModule> hlo_module,
+ perftools::gputools::StreamExecutor* stream_exec) override;
+
+ StatusOr<std::unique_ptr<Executable>> RunBackend(
+ std::unique_ptr<HloModule> hlo_module,
perftools::gputools::StreamExecutor* stream_exec) override;
StatusOr<std::vector<std::unique_ptr<Executable>>> Compile(
diff --git a/tensorflow/compiler/xla/service/llvm_compiler.cc b/tensorflow/compiler/xla/service/llvm_compiler.cc
index ba0304fb8c..34f3419269 100644
--- a/tensorflow/compiler/xla/service/llvm_compiler.cc
+++ b/tensorflow/compiler/xla/service/llvm_compiler.cc
@@ -27,8 +27,10 @@ StatusOr<std::vector<std::unique_ptr<Executable>>> LLVMCompiler::Compile(
"Model partitioning not implemented for the CPU/GPU compilers!");
}
+ TF_ASSIGN_OR_RETURN(
+ modules[i], RunHloPasses(std::move(modules[i]), stream_execs[i][0]));
TF_ASSIGN_OR_RETURN(std::unique_ptr<Executable> executable,
- Compile(std::move(modules[i]), stream_execs[i][0]));
+ RunBackend(std::move(modules[i]), stream_execs[i][0]));
result.push_back(std::move(executable));
}
diff --git a/tensorflow/compiler/xla/service/llvm_compiler.h b/tensorflow/compiler/xla/service/llvm_compiler.h
index c4f689eabe..c5393cef4f 100644
--- a/tensorflow/compiler/xla/service/llvm_compiler.h
+++ b/tensorflow/compiler/xla/service/llvm_compiler.h
@@ -58,10 +58,14 @@ class LLVMCompiler : public Compiler {
void RemovePostOptimizationHook() { user_post_optimization_hook_ = nullptr; }
// Bring in
- // StatusOr<std::unique_ptr<Executable>> Compile(
- // std::unique_ptr<HloModule> module,
- // perftools::gputools::StreamExecutor* executor)
- using Compiler::Compile;
+ // StatusOr<std::unique_ptr<Executable>> RunBackend(
+ // std::unique_ptr<HloModule> module,
+ // perftools::gputools::StreamExecutor* stream_exec)
+ // StatusOr<std::unique_ptr<HloModule>> RunHloPasses(
+ // std::unique_ptr<HloModule> module,
+ // perftools::gputools::StreamExecutor* stream_exec)
+ using Compiler::RunBackend;
+ using Compiler::RunHloPasses;
StatusOr<std::vector<std::unique_ptr<Executable>>> Compile(
std::vector<std::unique_ptr<HloModule>> modules,
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index ee9501dd48..0544a1697b 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -431,8 +431,11 @@ StatusOr<std::unique_ptr<Executable>> Service::BuildExecutable(
true));
TF_ASSIGN_OR_RETURN(
+ module, backend->compiler()->RunHloPasses(std::move(module), executor));
+
+ TF_ASSIGN_OR_RETURN(
std::unique_ptr<Executable> executable,
- backend->compiler()->Compile(std::move(module), executor));
+ backend->compiler()->RunBackend(std::move(module), executor));
if (!other_directory_path.empty()) {
executable->set_session_module(std::move(session_module));