aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/plugin/executor/executable.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/compiler/plugin/executor/executable.cc')
-rw-r--r--tensorflow/compiler/plugin/executor/executable.cc147
1 files changed, 147 insertions, 0 deletions
diff --git a/tensorflow/compiler/plugin/executor/executable.cc b/tensorflow/compiler/plugin/executor/executable.cc
new file mode 100644
index 0000000000..79eea9af3f
--- /dev/null
+++ b/tensorflow/compiler/plugin/executor/executable.cc
@@ -0,0 +1,147 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/plugin/executor/executable.h"
+#include "tensorflow/compiler/plugin/executor/executor.h"
+
+#include "tensorflow/compiler/xla/service/hlo_evaluator.h"
+
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+
+namespace se = ::perftools::gputools;
+namespace sep = ::perftools::gputools::executorplugin;
+
+namespace xla {
+namespace executorplugin {
+
+ExecutorExecutable::ExecutorExecutable(std::unique_ptr<HloModule> hlo_module)
+ : Executable(std::move(hlo_module), ShapeSizeBytes) {}
+
+ExecutorExecutable::~ExecutorExecutable() {}
+
+static se::DeviceMemoryBase AllocateSingleOutput(sep::ExecutorExecutor* executor,
+ const Literal& literal) {
+ int64 size(xla::ShapeUtil::ByteSizeOf(literal.shape()));
+ void* buf = executor->Allocate(size);
+ const void* src = literal.InternalData();
+ memcpy(buf, src, size);
+ return se::DeviceMemoryBase(buf, size);
+}
+
+static se::DeviceMemoryBase AllocateOutputBuffer(sep::ExecutorExecutor* executor,
+ const Literal& literal) {
+ const Shape& shape = literal.shape();
+ if (shape.element_type() != xla::TUPLE) {
+ return AllocateSingleOutput(executor, literal);
+ } else {
+ int64 size(xla::ShapeUtil::ByteSizeOf(shape, sizeof(void*)));
+ void** buf = reinterpret_cast<void**>(executor->Allocate(size));
+ for (int64 n = 0; n < xla::ShapeUtil::TupleElementCount(shape); n++) {
+ se::DeviceMemoryBase out =
+ AllocateSingleOutput(executor, literal.tuple_literals(n));
+ *buf++ = out.opaque();
+ }
+
+ return se::DeviceMemoryBase(buf, size);
+ }
+}
+
+StatusOr<se::DeviceMemoryBase> ExecutorExecutable::ExecuteOnStream(
+ const ServiceExecutableRunOptions* run_options,
+ tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
+ HloExecutionProfile* hlo_execution_profile) {
+ se::Stream* stream = run_options->stream();
+
+ VLOG(1) << "Execute " << module().name();
+ if (VLOG_IS_ON(2)) {
+ for (const auto& a : arguments) {
+ VLOG(2) << "-- argument " << a.opaque();
+ }
+ }
+
+ uint64 start_micros = tensorflow::Env::Default()->NowMicros();
+
+ HloComputation* computation = module().entry_computation();
+ if (computation->num_parameters() != arguments.size()) {
+ return tensorflow::errors::Internal(
+ "Mismatch between argument count and graph parameter count.");
+ }
+
+ // Create the arguments as an vector of XLA literals
+ std::vector<std::unique_ptr<Literal>> arg_literals;
+ std::vector<Literal*> arg_literals_ptrs;
+ for (int64 p = 0; p < computation->num_parameters(); p++) {
+ // Create the input literal for the parameter
+ HloInstruction* param = computation->parameter_instruction(p);
+ arg_literals.emplace_back(Literal::CreateFromShape(param->shape()));
+ arg_literals_ptrs.push_back(arg_literals.back().get());
+
+ // Copy in the data from the stream_executor buffers
+ void* buffer = arg_literals.back().get()->MutableInternalData();
+ memcpy(buffer, arguments[p].opaque(),
+ ShapeUtil::ByteSizeOf(param->shape()));
+ }
+
+ // Execute the graph using the evaluator
+ HloEvaluator evaluator;
+ std::unique_ptr<Literal> output;
+ TF_ASSIGN_OR_RETURN(output,
+ evaluator.Evaluate(computation, arg_literals_ptrs));
+
+ // Copy the result into the return buffer
+ perftools::gputools::StreamExecutor* executor(stream->parent());
+ sep::ExecutorExecutor* executorExecutor(
+ static_cast<sep::ExecutorExecutor*>(executor->implementation()));
+
+ se::DeviceMemoryBase ret =
+ AllocateOutputBuffer(executorExecutor, *(output.get()));
+
+ uint64 end_micros = tensorflow::Env::Default()->NowMicros();
+
+ {
+ tensorflow::mutex_lock lock(mutex_);
+ const double nanoseconds = (end_micros - start_micros) * 1000.0;
+ execution_profile_.set_compute_time_ns(std::max(nanoseconds, 1.0));
+ }
+
+ return ret;
+}
+
+StatusOr<std::unique_ptr<ShapedBuffer>> ExecutorExecutable::ExecuteOnStream(
+ const ServiceExecutableRunOptions* run_options,
+ tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
+ HloExecutionProfile* hlo_execution_profile) {
+ return tensorflow::errors::Unimplemented(
+ "ExecuteOnStream is not yet supported on Executor.");
+}
+
+StatusOr<se::DeviceMemoryBase> ExecutorExecutable::ExecuteAsyncOnStream(
+ const ServiceExecutableRunOptions* run_options,
+ tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments) {
+ return tensorflow::errors::Unimplemented(
+ "ExecuteAsyncOnStream is not yet supported on Executor.");
+}
+
+/*static*/ int64 ExecutorExecutable::ShapeSizeBytes(const Shape& shape) {
+ if (ShapeUtil::IsOpaque(shape)) {
+ return sizeof(void*);
+ }
+ return ShapeUtil::ByteSizeOf(shape, sizeof(void*));
+}
+
+
+} // namespace executorplugin
+} // namespace xla