aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler
diff options
context:
space:
mode:
authorGravatar Anna R <annarev@google.com>2018-08-10 14:42:11 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-08-10 14:49:18 -0700
commit0b36ff79021b907f5447bfcbaa060dbdc2114c67 (patch)
treeb43e6f7ef6e074ce21e1c48881eaa270bef7591f /tensorflow/compiler
parent430dd8d1c14cd665aafdcdee82c76ec304f51ef1 (diff)
Automated rollback of commit 9eb310c3f651d69b9a8eea016f6397049c5a0a31
PiperOrigin-RevId: 208270711
Diffstat (limited to 'tensorflow/compiler')
-rw-r--r--tensorflow/compiler/xla/service/cpu/cpu_executable.cc38
-rw-r--r--tensorflow/compiler/xla/service/cpu/cpu_executable.h10
2 files changed, 20 insertions, 28 deletions
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
index c376864c3e..946f5124b8 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
@@ -249,11 +249,24 @@ StatusOr<ScopedShapedBuffer> CpuExecutable::ExecuteOnStream(
const ServiceExecutableRunOptions* run_options,
tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
HloExecutionProfile* hlo_execution_profile) {
+ if (GetRootPointsToSet().IsAmbiguous()) {
+ return Unimplemented("Points-to set of root instruction is ambiguous");
+ }
+
+ se::Stream* stream = run_options->stream();
+ DeviceMemoryAllocator* memory_allocator = run_options->allocator();
+
+ std::vector<OwningDeviceMemory> owning_buffers;
+ std::vector<se::DeviceMemoryBase> unowning_buffers;
TF_ASSIGN_OR_RETURN(
- auto result,
- ExecuteAsyncOnStreamImpl(run_options, arguments, hlo_execution_profile));
- TF_RETURN_IF_ERROR(run_options->stream()->BlockHostUntilDone());
- return std::move(result);
+ std::tie(unowning_buffers, owning_buffers),
+ CreateTempArray(memory_allocator, stream->parent()->device_ordinal(),
+ arguments));
+
+ TF_RETURN_IF_ERROR(ExecuteComputeFunction(
+ &run_options->run_options(), unowning_buffers, hlo_execution_profile));
+
+ return CreateResultShapedBuffer(run_options, &owning_buffers);
}
StatusOr<ScopedShapedBuffer> CpuExecutable::ExecuteAsyncOnStream(
@@ -264,16 +277,6 @@ StatusOr<ScopedShapedBuffer> CpuExecutable::ExecuteAsyncOnStream(
"Asynchronous execution on stream with hlo profiling is not yet "
"supported on CPU.");
}
- return ExecuteAsyncOnStreamImpl(run_options, arguments, nullptr);
-}
-
-StatusOr<ScopedShapedBuffer> CpuExecutable::ExecuteAsyncOnStreamImpl(
- const ServiceExecutableRunOptions* run_options,
- tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
- HloExecutionProfile* hlo_execution_profile) {
- if (GetRootPointsToSet().IsAmbiguous()) {
- return Unimplemented("Points-to set of root instruction is ambiguous");
- }
auto* host_stream = dynamic_cast<se::host::HostStream*>(
run_options->stream()->implementation());
@@ -307,20 +310,19 @@ StatusOr<ScopedShapedBuffer> CpuExecutable::ExecuteAsyncOnStreamImpl(
ServiceExecutableRunOptions run_options;
std::vector<se::DeviceMemoryBase> unowning_buffers;
std::shared_ptr<std::vector<OwningDeviceMemory>> buffers;
- HloExecutionProfile* hlo_execution_profile;
void operator()() {
// Failing a CHECK here is not great, but I don't see an obvious way to
// return a failed Status asynchronously.
TF_CHECK_OK(executable->ExecuteComputeFunction(
- &run_options.run_options(), unowning_buffers, hlo_execution_profile));
+ &run_options.run_options(), unowning_buffers,
+ /*hlo_execution_profile=*/nullptr));
}
};
host_stream->EnqueueTask(
AsyncRunTask{this, *run_options, std::move(unowning_buffers),
std::make_shared<std::vector<OwningDeviceMemory>>(
- std::move(owning_buffers)),
- hlo_execution_profile});
+ std::move(owning_buffers))});
return std::move(result);
}
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.h b/tensorflow/compiler/xla/service/cpu/cpu_executable.h
index 96e53de57e..8af8a5dfec 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_executable.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.h
@@ -85,16 +85,6 @@ class CpuExecutable : public Executable {
const BufferAssignment& buffer_assignment() const { return *assignment_; }
private:
- // This is for sharing the code between ExecuteOnStream and
- // ExecuteAsyncOnStream.
- //
- // Notice that it's tricky to use correctly, as the profile object (when it
- // exists) must out-live the task.
- StatusOr<ScopedShapedBuffer> ExecuteAsyncOnStreamImpl(
- const ServiceExecutableRunOptions* run_options,
- tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
- HloExecutionProfile* hlo_execution_profile);
-
// Creates an array suitable for passing as the "temps" argument to the JIT
// compiled function pointer.
//