Minor cleanup, now that there's a single Executable::ExecuteOnStream method.

PiperOrigin-RevId: 179630890
author: A. Unique TensorFlower <gardener@tensorflow.org> 2017-12-19 18:15:52 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-12-20 11:02:43 -0800
commit: 16257646af59a39ea04f16735d4f8a61b97230ed (patch)
tree: 82a9ca83728a5cebd2dff2066e5a9127355696c4 /tensorflow/compiler/xla/service/executable.h
parent: 1988732f81bc5f61cd97c20952d5359fc0bf627f (diff)
1 files changed, 3 insertions, 71 deletions
diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h
index 23864dda78..5ecfdffe21 100644
--- a/tensorflow/compiler/xla/service/executable.h
+++ b/tensorflow/compiler/xla/service/executable.h
@@ -96,13 +96,10 @@ class Executable {
 
   // Convenience wrapper for calling Executable::ExecuteOnStream. Sets up a
   // timer for the execution, sets up HLO profiling if enabled, and fills in the
-  // given ExecutionProfile if non-null.  The ExecuteOnStream overloads have
-  // different argument types and return types, so this method is templated on
-  // argument type and return type of the execute function.
-  template <typename ReturnT, typename ArgT>
-  StatusOr<ReturnT> ExecuteOnStreamWrapper(
+  // given ExecutionProfile if non-null.
+  StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteOnStreamWrapper(
       const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile,
-      const ArgT& arguments);
+      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments);
 
   // Returns the ExecutionProfile from executing on the device. This includes
   // the number of cycles taken for the computation or the compilation time.
@@ -186,71 +183,6 @@ class Executable {
   std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map_;
 };
 
-template <typename ReturnT, typename ArgT>
-StatusOr<ReturnT> Executable::ExecuteOnStreamWrapper(
-    const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile,
-    const ArgT& arguments) {
-  perftools::gputools::Stream* stream = run_options->stream();
-  std::unique_ptr<perftools::gputools::Timer> timer;
-  if (profile != nullptr) {
-    timer.reset(new perftools::gputools::Timer(stream->parent()));
-    stream->InitTimer(timer.get()).ThenStartTimer(timer.get());
-  }
-
-  VLOG(1) << "enqueueing executable on stream...";
-  // If the profiling flag isn't enabled, we pass nullptr as the profile to
-  // indicate profiling is not requested.
-  std::unique_ptr<HloExecutionProfile> profile_ptr =
-      module_config().debug_options().xla_hlo_profile() &&
-              hlo_profiling_enabled()
-          ? MakeUnique<HloExecutionProfile>(&hlo_profile_printer(),
-                                            &hlo_profile_index_map())
-          : nullptr;
-
-  auto return_value =
-      ExecuteOnStream(run_options, arguments, profile_ptr.get());
-
-  if (profile != nullptr) {
-    VLOG(1) << "enqueueing 'stop timer' and blocking host until done...";
-    stream->ThenStopTimer(timer.get());
-    TF_RETURN_IF_ERROR(stream->BlockHostUntilDone());
-    VLOG(1) << "done with block-host-until-done";
-
-    // Merge in run-time profile information from execution_profile.
-    profile->MergeFrom(execution_profile());
-
-    // Overall execution time (in nanoseconds) from the executor timer.
-    if (stream->ok()) {
-      // Don't read timer->Nanoseconds() if the stream isn't OK -- that's
-      // illegal.
-      profile->set_compute_and_transfer_time_ns(timer->Nanoseconds());
-    }
-
-    // TODO(b/28123297): On GPU we end up including transfer time in
-    // the compute time this way. Instead, we should get the correct
-    // value by measuring it. Setting the field here at least lets
-    // benchmarks provide *some* value for GPU computations.
-    //
-    // TODO(b/28447609): The value in compute_and_transfer_time_ns is actually
-    // the compute time without the transfer time, so this way we get the
-    // correct compute time. We should instead have the correct value for
-    // compute_and_transfer_time and set compute_time to the compute time.
-    if (profile->compute_time_ns() == 0) {
-      profile->set_compute_time_ns(profile->compute_and_transfer_time_ns());
-    }
-  }
-
-  if (profile_ptr != nullptr) {
-    XLA_LOG_LINES(
-        tensorflow::INFO,
-        profile_ptr->ToString(stream->parent()->GetDeviceDescription()));
-    hlo_graph_dumper::MaybeDumpHloModule(module(), "Service::Execute",
-                                         profile_ptr.get());
-  }
-
-  return return_value;
-}
-
 }  // namespace xla
 
 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_EXECUTABLE_H_
author	A. Unique TensorFlower <gardener@tensorflow.org>	2017-12-19 18:15:52 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-12-20 11:02:43 -0800
commit	16257646af59a39ea04f16735d4f8a61b97230ed (patch)
tree	82a9ca83728a5cebd2dff2066e5a9127355696c4 /tensorflow/compiler/xla/service/executable.h
parent	1988732f81bc5f61cd97c20952d5359fc0bf627f (diff)