aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/executable.h
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-12-19 18:15:52 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-12-20 11:02:43 -0800
commit16257646af59a39ea04f16735d4f8a61b97230ed (patch)
tree82a9ca83728a5cebd2dff2066e5a9127355696c4 /tensorflow/compiler/xla/service/executable.h
parent1988732f81bc5f61cd97c20952d5359fc0bf627f (diff)
Minor cleanup, now that there's a single Executable::ExecuteOnStream method.
PiperOrigin-RevId: 179630890
Diffstat (limited to 'tensorflow/compiler/xla/service/executable.h')
-rw-r--r--tensorflow/compiler/xla/service/executable.h74
1 files changed, 3 insertions, 71 deletions
diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h
index 23864dda78..5ecfdffe21 100644
--- a/tensorflow/compiler/xla/service/executable.h
+++ b/tensorflow/compiler/xla/service/executable.h
@@ -96,13 +96,10 @@ class Executable {
// Convenience wrapper for calling Executable::ExecuteOnStream. Sets up a
// timer for the execution, sets up HLO profiling if enabled, and fills in the
- // given ExecutionProfile if non-null. The ExecuteOnStream overloads have
- // different argument types and return types, so this method is templated on
- // argument type and return type of the execute function.
- template <typename ReturnT, typename ArgT>
- StatusOr<ReturnT> ExecuteOnStreamWrapper(
+ // given ExecutionProfile if non-null.
+ StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteOnStreamWrapper(
const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile,
- const ArgT& arguments);
+ tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments);
// Returns the ExecutionProfile from executing on the device. This includes
// the number of cycles taken for the computation or the compilation time.
@@ -186,71 +183,6 @@ class Executable {
std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map_;
};
-template <typename ReturnT, typename ArgT>
-StatusOr<ReturnT> Executable::ExecuteOnStreamWrapper(
- const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile,
- const ArgT& arguments) {
- perftools::gputools::Stream* stream = run_options->stream();
- std::unique_ptr<perftools::gputools::Timer> timer;
- if (profile != nullptr) {
- timer.reset(new perftools::gputools::Timer(stream->parent()));
- stream->InitTimer(timer.get()).ThenStartTimer(timer.get());
- }
-
- VLOG(1) << "enqueueing executable on stream...";
- // If the profiling flag isn't enabled, we pass nullptr as the profile to
- // indicate profiling is not requested.
- std::unique_ptr<HloExecutionProfile> profile_ptr =
- module_config().debug_options().xla_hlo_profile() &&
- hlo_profiling_enabled()
- ? MakeUnique<HloExecutionProfile>(&hlo_profile_printer(),
- &hlo_profile_index_map())
- : nullptr;
-
- auto return_value =
- ExecuteOnStream(run_options, arguments, profile_ptr.get());
-
- if (profile != nullptr) {
- VLOG(1) << "enqueueing 'stop timer' and blocking host until done...";
- stream->ThenStopTimer(timer.get());
- TF_RETURN_IF_ERROR(stream->BlockHostUntilDone());
- VLOG(1) << "done with block-host-until-done";
-
- // Merge in run-time profile information from execution_profile.
- profile->MergeFrom(execution_profile());
-
- // Overall execution time (in nanoseconds) from the executor timer.
- if (stream->ok()) {
- // Don't read timer->Nanoseconds() if the stream isn't OK -- that's
- // illegal.
- profile->set_compute_and_transfer_time_ns(timer->Nanoseconds());
- }
-
- // TODO(b/28123297): On GPU we end up including transfer time in
- // the compute time this way. Instead, we should get the correct
- // value by measuring it. Setting the field here at least lets
- // benchmarks provide *some* value for GPU computations.
- //
- // TODO(b/28447609): The value in compute_and_transfer_time_ns is actually
- // the compute time without the transfer time, so this way we get the
- // correct compute time. We should instead have the correct value for
- // compute_and_transfer_time and set compute_time to the compute time.
- if (profile->compute_time_ns() == 0) {
- profile->set_compute_time_ns(profile->compute_and_transfer_time_ns());
- }
- }
-
- if (profile_ptr != nullptr) {
- XLA_LOG_LINES(
- tensorflow::INFO,
- profile_ptr->ToString(stream->parent()->GetDeviceDescription()));
- hlo_graph_dumper::MaybeDumpHloModule(module(), "Service::Execute",
- profile_ptr.get());
- }
-
- return return_value;
-}
-
} // namespace xla
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_EXECUTABLE_H_