Automated rollback of commit 9eb310c3f651d69b9a8eea016f6397049c5a0a31

PiperOrigin-RevId: 208270711
author: Anna R <annarev@google.com> 2018-08-10 14:42:11 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-08-10 14:49:18 -0700
commit: 0b36ff79021b907f5447bfcbaa060dbdc2114c67 (patch)
tree: b43e6f7ef6e074ce21e1c48881eaa270bef7591f /tensorflow/compiler
parent: 430dd8d1c14cd665aafdcdee82c76ec304f51ef1 (diff)
2 files changed, 20 insertions, 28 deletions
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
index c376864c3e..946f5124b8 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
@@ -249,11 +249,24 @@ StatusOr<ScopedShapedBuffer> CpuExecutable::ExecuteOnStream(
     const ServiceExecutableRunOptions* run_options,
     tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
     HloExecutionProfile* hlo_execution_profile) {
+  if (GetRootPointsToSet().IsAmbiguous()) {
+    return Unimplemented("Points-to set of root instruction is ambiguous");
+  }
+
+  se::Stream* stream = run_options->stream();
+  DeviceMemoryAllocator* memory_allocator = run_options->allocator();
+
+  std::vector<OwningDeviceMemory> owning_buffers;
+  std::vector<se::DeviceMemoryBase> unowning_buffers;
   TF_ASSIGN_OR_RETURN(
-      auto result,
-      ExecuteAsyncOnStreamImpl(run_options, arguments, hlo_execution_profile));
-  TF_RETURN_IF_ERROR(run_options->stream()->BlockHostUntilDone());
-  return std::move(result);
+      std::tie(unowning_buffers, owning_buffers),
+      CreateTempArray(memory_allocator, stream->parent()->device_ordinal(),
+                      arguments));
+
+  TF_RETURN_IF_ERROR(ExecuteComputeFunction(
+      &run_options->run_options(), unowning_buffers, hlo_execution_profile));
+
+  return CreateResultShapedBuffer(run_options, &owning_buffers);
 }
 
 StatusOr<ScopedShapedBuffer> CpuExecutable::ExecuteAsyncOnStream(
@@ -264,16 +277,6 @@ StatusOr<ScopedShapedBuffer> CpuExecutable::ExecuteAsyncOnStream(
         "Asynchronous execution on stream with hlo profiling is not yet "
         "supported on CPU.");
   }
-  return ExecuteAsyncOnStreamImpl(run_options, arguments, nullptr);
-}
-
-StatusOr<ScopedShapedBuffer> CpuExecutable::ExecuteAsyncOnStreamImpl(
-    const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
-    HloExecutionProfile* hlo_execution_profile) {
-  if (GetRootPointsToSet().IsAmbiguous()) {
-    return Unimplemented("Points-to set of root instruction is ambiguous");
-  }
 
   auto* host_stream = dynamic_cast<se::host::HostStream*>(
       run_options->stream()->implementation());
@@ -307,20 +310,19 @@ StatusOr<ScopedShapedBuffer> CpuExecutable::ExecuteAsyncOnStreamImpl(
     ServiceExecutableRunOptions run_options;
     std::vector<se::DeviceMemoryBase> unowning_buffers;
     std::shared_ptr<std::vector<OwningDeviceMemory>> buffers;
-    HloExecutionProfile* hlo_execution_profile;
 
     void operator()() {
       // Failing a CHECK here is not great, but I don't see an obvious way to
       // return a failed Status asynchronously.
       TF_CHECK_OK(executable->ExecuteComputeFunction(
-          &run_options.run_options(), unowning_buffers, hlo_execution_profile));
+          &run_options.run_options(), unowning_buffers,
+          /*hlo_execution_profile=*/nullptr));
     }
   };
   host_stream->EnqueueTask(
       AsyncRunTask{this, *run_options, std::move(unowning_buffers),
                    std::make_shared<std::vector<OwningDeviceMemory>>(
-                       std::move(owning_buffers)),
-                   hlo_execution_profile});
+                       std::move(owning_buffers))});
 
   return std::move(result);
 }
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.h b/tensorflow/compiler/xla/service/cpu/cpu_executable.h
index 96e53de57e..8af8a5dfec 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_executable.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.h
@@ -85,16 +85,6 @@ class CpuExecutable : public Executable {
   const BufferAssignment& buffer_assignment() const { return *assignment_; }
 
  private:
-  // This is for sharing the code between ExecuteOnStream and
-  // ExecuteAsyncOnStream.
-  //
-  // Notice that it's tricky to use correctly, as the profile object (when it
-  // exists) must out-live the task.
-  StatusOr<ScopedShapedBuffer> ExecuteAsyncOnStreamImpl(
-      const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
-      HloExecutionProfile* hlo_execution_profile);
-
   // Creates an array suitable for passing as the "temps" argument to the JIT
   // compiled function pointer.
   //
author	Anna R <annarev@google.com>	2018-08-10 14:42:11 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-08-10 14:49:18 -0700
commit	0b36ff79021b907f5447bfcbaa060dbdc2114c67 (patch)
tree	b43e6f7ef6e074ce21e1c48881eaa270bef7591f /tensorflow/compiler
parent	430dd8d1c14cd665aafdcdee82c76ec304f51ef1 (diff)