1 files changed, 168 insertions, 0 deletions
diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h
new file mode 100644
index 0000000000..373ab79ab2
--- /dev/null
+++ b/tensorflow/compiler/xla/service/executable.h
@@ -0,0 +1,168 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_EXECUTABLE_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_EXECUTABLE_H_
+
+#include <memory>
+#include <utility>
+
+#include "tensorflow/compiler/xla/executable_run_options.h"
+#include "tensorflow/compiler/xla/service/computation_layout.h"
+#include "tensorflow/compiler/xla/service/device_memory_allocator.h"
+#include "tensorflow/compiler/xla/service/hlo_execution_profile.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_module_config.h"
+#include "tensorflow/compiler/xla/service/session.pb.h"
+#include "tensorflow/compiler/xla/service/shaped_buffer.h"
+#include "tensorflow/compiler/xla/service/versioned_computation_handle.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/stream_executor_no_cuda.h"
+#include "tensorflow/core/platform/thread_annotations.h"
+
+namespace xla {
+
+// A given platform's compiler will produce an Executable -- this is a uniform
+// interface that is used for launching compiled programs across platforms.
+//
+// TODO(leary) will need to extend this to support multiple streams/devices as
+// we begin to compile single programs to run on multiple devices.
+class Executable {
+ public:
+  explicit Executable(std::unique_ptr<HloModule> hlo_module,
+                      std::unique_ptr<HloModuleConfig> module_config)
+      : hlo_module_(std::move(hlo_module)),
+        module_config_(std::move(module_config)) {}
+  virtual ~Executable() {}
+
+  // Enqueues the compilation result on the provided stream, passing the given
+  // arguments. This call is blocking and returns after the execution is done.
+  //
+  // If the hlo_execution_profile is provided as non-nullptr, profiling will be
+  // enabled.
+  //
+  // Returns the device memory region that a successful execution would
+  // populate.
+  virtual StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteOnStream(
+      const ExecutableRunOptions* run_options,
+      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
+          arguments,
+      HloExecutionProfile* hlo_execution_profile) = 0;
+
+  // Overload of ExecuteOnStream which returns and takes arguments as
+  // ShapedBuffers. Used for LocalService execution.
+  virtual StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteOnStream(
+      const ExecutableRunOptions* run_options,
+      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
+      HloExecutionProfile* hlo_execution_profile) = 0;
+
+  // Overload of which writes the result into a pre-allocated buffer
+  // (result_buffer).
+  virtual Status ExecuteOnStream(
+      const ExecutableRunOptions* run_options,
+      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
+      ShapedBuffer* result_buffer,
+      HloExecutionProfile* hlo_execution_profile) = 0;
+
+  // Same as ExecuteOnStream(), but this call is non-blocking and returns as
+  // soon as all of the operations are enqueued for launch on the stream.
+  virtual StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteAsyncOnStream(
+      const ExecutableRunOptions* run_options,
+      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
+          arguments) = 0;
+
+  // Same as ExecuteOnStream(), but runs this executable on multiple
+  // streams. arguments[i] contains the arguments to the execution on
+  // run_options[i]->stream() and the returned value is at index i of the
+  // returned vector.
+  virtual StatusOr<std::vector<perftools::gputools::DeviceMemoryBase>>
+  ExecuteOnStreams(
+      tensorflow::gtl::ArraySlice<const ExecutableRunOptions> run_options,
+      tensorflow::gtl::ArraySlice<
+          tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>>
+          arguments);
+
+  // Returns the ExecutionProfile from executing on the device. This includes
+  // the number of cycles taken for the computation or the compilation time.
+  ExecutionProfile execution_profile() const {
+    tensorflow::mutex_lock lock(mutex_);
+    return execution_profile_;
+  }
+
+  // Returns whether this executable was compiled with HLO profilings support
+  // enabled. If not, the caller should not expect an hlo_execution_profile
+  // passed to ExecuteOnStream above to be populated during execution.
+  bool hlo_profiling_enabled() const {
+    return module_config_->hlo_profiling_enabled();
+  }
+
+  const HloModule& module() const { return *hlo_module_; }
+
+  const HloModuleConfig& module_config() const { return *module_config_; }
+
+  // Returns the versioned computation handle of the computation computed by
+  // this executable.
+  const VersionedComputationHandle& entry_computation_handle() const {
+    return hlo_module_->entry_computation_handle();
+  }
+
+  // The shape (including layout) that results from this execution. This is the
+  // shape of the DeviceMemoryBase result value in ExecuteOnStream above.
+  const Shape& result_shape() const {
+    return module_config_->entry_computation_layout().result_shape();
+  }
+
+  // Dumping helpers.
+  void set_session_module(std::unique_ptr<xla::SessionModule> session_module) {
+    session_module_ = std::move(session_module);
+  }
+  bool dumping() const { return session_module_ != nullptr; }
+  SessionModule* session_module() const { return session_module_.get(); }
+  Status DumpSessionModule();
+
+  // Dump session_module to directory_path/filename.
+  static Status DumpToDirectory(const string& directory_path,
+                                const string& filename,
+                                const SessionModule& session_module);
+
+ protected:
+  mutable tensorflow::mutex mutex_;
+
+  // Execution profile data on the device.
+  ExecutionProfile execution_profile_ GUARDED_BY(mutex_);
+
+  // HloModule this was compiled from. BufferAssignment keeps pointers to
+  // HloInstructions owned by the HloModule so we need to keep the HloModule
+  // around.
+  std::unique_ptr<HloModule> hlo_module_;
+
+  // The configuration used to build this executable (parameter layouts, result
+  // layout, profiling enabled, etc).
+  std::unique_ptr<HloModuleConfig> module_config_;
+
+  // SessionModule this was compiled from. Null if not dumping executions.
+  std::unique_ptr<SessionModule> session_module_;
+
+  // Execution count, used to generate a unique filename for each dumped
+  // execution.
+  int64 execution_count_ = 0;
+};
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_EXECUTABLE_H_