diff options
author | Anna R <annarev@google.com> | 2018-08-10 14:42:11 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-08-10 14:49:18 -0700 |
commit | 0b36ff79021b907f5447bfcbaa060dbdc2114c67 (patch) | |
tree | b43e6f7ef6e074ce21e1c48881eaa270bef7591f /tensorflow/compiler | |
parent | 430dd8d1c14cd665aafdcdee82c76ec304f51ef1 (diff) |
Automated rollback of commit 9eb310c3f651d69b9a8eea016f6397049c5a0a31
PiperOrigin-RevId: 208270711
Diffstat (limited to 'tensorflow/compiler')
-rw-r--r-- | tensorflow/compiler/xla/service/cpu/cpu_executable.cc | 38 | ||||
-rw-r--r-- | tensorflow/compiler/xla/service/cpu/cpu_executable.h | 10 |
2 files changed, 20 insertions, 28 deletions
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc index c376864c3e..946f5124b8 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc @@ -249,11 +249,24 @@ StatusOr<ScopedShapedBuffer> CpuExecutable::ExecuteOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments, HloExecutionProfile* hlo_execution_profile) { + if (GetRootPointsToSet().IsAmbiguous()) { + return Unimplemented("Points-to set of root instruction is ambiguous"); + } + + se::Stream* stream = run_options->stream(); + DeviceMemoryAllocator* memory_allocator = run_options->allocator(); + + std::vector<OwningDeviceMemory> owning_buffers; + std::vector<se::DeviceMemoryBase> unowning_buffers; TF_ASSIGN_OR_RETURN( - auto result, - ExecuteAsyncOnStreamImpl(run_options, arguments, hlo_execution_profile)); - TF_RETURN_IF_ERROR(run_options->stream()->BlockHostUntilDone()); - return std::move(result); + std::tie(unowning_buffers, owning_buffers), + CreateTempArray(memory_allocator, stream->parent()->device_ordinal(), + arguments)); + + TF_RETURN_IF_ERROR(ExecuteComputeFunction( + &run_options->run_options(), unowning_buffers, hlo_execution_profile)); + + return CreateResultShapedBuffer(run_options, &owning_buffers); } StatusOr<ScopedShapedBuffer> CpuExecutable::ExecuteAsyncOnStream( @@ -264,16 +277,6 @@ StatusOr<ScopedShapedBuffer> CpuExecutable::ExecuteAsyncOnStream( "Asynchronous execution on stream with hlo profiling is not yet " "supported on CPU."); } - return ExecuteAsyncOnStreamImpl(run_options, arguments, nullptr); -} - -StatusOr<ScopedShapedBuffer> CpuExecutable::ExecuteAsyncOnStreamImpl( - const ServiceExecutableRunOptions* run_options, - tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments, - HloExecutionProfile* hlo_execution_profile) { - if (GetRootPointsToSet().IsAmbiguous()) { - return Unimplemented("Points-to set of root instruction is ambiguous"); - } auto* host_stream = dynamic_cast<se::host::HostStream*>( run_options->stream()->implementation()); @@ -307,20 +310,19 @@ StatusOr<ScopedShapedBuffer> CpuExecutable::ExecuteAsyncOnStreamImpl( ServiceExecutableRunOptions run_options; std::vector<se::DeviceMemoryBase> unowning_buffers; std::shared_ptr<std::vector<OwningDeviceMemory>> buffers; - HloExecutionProfile* hlo_execution_profile; void operator()() { // Failing a CHECK here is not great, but I don't see an obvious way to // return a failed Status asynchronously. TF_CHECK_OK(executable->ExecuteComputeFunction( - &run_options.run_options(), unowning_buffers, hlo_execution_profile)); + &run_options.run_options(), unowning_buffers, + /*hlo_execution_profile=*/nullptr)); } }; host_stream->EnqueueTask( AsyncRunTask{this, *run_options, std::move(unowning_buffers), std::make_shared<std::vector<OwningDeviceMemory>>( - std::move(owning_buffers)), - hlo_execution_profile}); + std::move(owning_buffers))}); return std::move(result); } diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.h b/tensorflow/compiler/xla/service/cpu/cpu_executable.h index 96e53de57e..8af8a5dfec 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.h @@ -85,16 +85,6 @@ class CpuExecutable : public Executable { const BufferAssignment& buffer_assignment() const { return *assignment_; } private: - // This is for sharing the code between ExecuteOnStream and - // ExecuteAsyncOnStream. - // - // Notice that it's tricky to use correctly, as the profile object (when it - // exists) must out-live the task. - StatusOr<ScopedShapedBuffer> ExecuteAsyncOnStreamImpl( - const ServiceExecutableRunOptions* run_options, - tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments, - HloExecutionProfile* hlo_execution_profile); - // Creates an array suitable for passing as the "temps" argument to the JIT // compiled function pointer. // |