path: root/tensorflow/compiler/xla/service/executable.h
diff options
Diffstat (limited to 'tensorflow/compiler/xla/service/executable.h')
1 files changed, 168 insertions, 0 deletions
diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h
new file mode 100644
index 0000000000..373ab79ab2
--- /dev/null
+++ b/tensorflow/compiler/xla/service/executable.h
@@ -0,0 +1,168 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+See the License for the specific language governing permissions and
+limitations under the License.
+#include <memory>
+#include <utility>
+#include "tensorflow/compiler/xla/executable_run_options.h"
+#include "tensorflow/compiler/xla/service/computation_layout.h"
+#include "tensorflow/compiler/xla/service/device_memory_allocator.h"
+#include "tensorflow/compiler/xla/service/hlo_execution_profile.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_module_config.h"
+#include "tensorflow/compiler/xla/service/session.pb.h"
+#include "tensorflow/compiler/xla/service/shaped_buffer.h"
+#include "tensorflow/compiler/xla/service/versioned_computation_handle.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/stream_executor_no_cuda.h"
+#include "tensorflow/core/platform/thread_annotations.h"
+namespace xla {
+// A given platform's compiler will produce an Executable -- this is a uniform
+// interface that is used for launching compiled programs across platforms.
+// TODO(leary) will need to extend this to support multiple streams/devices as
+// we begin to compile single programs to run on multiple devices.
+class Executable {
+ public:
+ explicit Executable(std::unique_ptr<HloModule> hlo_module,
+ std::unique_ptr<HloModuleConfig> module_config)
+ : hlo_module_(std::move(hlo_module)),
+ module_config_(std::move(module_config)) {}
+ virtual ~Executable() {}
+ // Enqueues the compilation result on the provided stream, passing the given
+ // arguments. This call is blocking and returns after the execution is done.
+ //
+ // If the hlo_execution_profile is provided as non-nullptr, profiling will be
+ // enabled.
+ //
+ // Returns the device memory region that a successful execution would
+ // populate.
+ virtual StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteOnStream(
+ const ExecutableRunOptions* run_options,
+ tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
+ arguments,
+ HloExecutionProfile* hlo_execution_profile) = 0;
+ // Overload of ExecuteOnStream which returns and takes arguments as
+ // ShapedBuffers. Used for LocalService execution.
+ virtual StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteOnStream(
+ const ExecutableRunOptions* run_options,
+ tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
+ HloExecutionProfile* hlo_execution_profile) = 0;
+ // Overload of which writes the result into a pre-allocated buffer
+ // (result_buffer).
+ virtual Status ExecuteOnStream(
+ const ExecutableRunOptions* run_options,
+ tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
+ ShapedBuffer* result_buffer,
+ HloExecutionProfile* hlo_execution_profile) = 0;
+ // Same as ExecuteOnStream(), but this call is non-blocking and returns as
+ // soon as all of the operations are enqueued for launch on the stream.
+ virtual StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteAsyncOnStream(
+ const ExecutableRunOptions* run_options,
+ tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
+ arguments) = 0;
+ // Same as ExecuteOnStream(), but runs this executable on multiple
+ // streams. arguments[i] contains the arguments to the execution on
+ // run_options[i]->stream() and the returned value is at index i of the
+ // returned vector.
+ virtual StatusOr<std::vector<perftools::gputools::DeviceMemoryBase>>
+ ExecuteOnStreams(
+ tensorflow::gtl::ArraySlice<const ExecutableRunOptions> run_options,
+ tensorflow::gtl::ArraySlice<
+ tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>>
+ arguments);
+ // Returns the ExecutionProfile from executing on the device. This includes
+ // the number of cycles taken for the computation or the compilation time.
+ ExecutionProfile execution_profile() const {
+ tensorflow::mutex_lock lock(mutex_);
+ return execution_profile_;
+ }
+ // Returns whether this executable was compiled with HLO profilings support
+ // enabled. If not, the caller should not expect an hlo_execution_profile
+ // passed to ExecuteOnStream above to be populated during execution.
+ bool hlo_profiling_enabled() const {
+ return module_config_->hlo_profiling_enabled();
+ }
+ const HloModule& module() const { return *hlo_module_; }
+ const HloModuleConfig& module_config() const { return *module_config_; }
+ // Returns the versioned computation handle of the computation computed by
+ // this executable.
+ const VersionedComputationHandle& entry_computation_handle() const {
+ return hlo_module_->entry_computation_handle();
+ }
+ // The shape (including layout) that results from this execution. This is the
+ // shape of the DeviceMemoryBase result value in ExecuteOnStream above.
+ const Shape& result_shape() const {
+ return module_config_->entry_computation_layout().result_shape();
+ }
+ // Dumping helpers.
+ void set_session_module(std::unique_ptr<xla::SessionModule> session_module) {
+ session_module_ = std::move(session_module);
+ }
+ bool dumping() const { return session_module_ != nullptr; }
+ SessionModule* session_module() const { return session_module_.get(); }
+ Status DumpSessionModule();
+ // Dump session_module to directory_path/filename.
+ static Status DumpToDirectory(const string& directory_path,
+ const string& filename,
+ const SessionModule& session_module);
+ protected:
+ mutable tensorflow::mutex mutex_;
+ // Execution profile data on the device.
+ ExecutionProfile execution_profile_ GUARDED_BY(mutex_);
+ // HloModule this was compiled from. BufferAssignment keeps pointers to
+ // HloInstructions owned by the HloModule so we need to keep the HloModule
+ // around.
+ std::unique_ptr<HloModule> hlo_module_;
+ // The configuration used to build this executable (parameter layouts, result
+ // layout, profiling enabled, etc).
+ std::unique_ptr<HloModuleConfig> module_config_;
+ // SessionModule this was compiled from. Null if not dumping executions.
+ std::unique_ptr<SessionModule> session_module_;
+ // Execution count, used to generate a unique filename for each dumped
+ // execution.
+ int64 execution_count_ = 0;
+} // namespace xla