diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2017-12-19 18:15:52 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-12-20 11:02:43 -0800 |
commit | 16257646af59a39ea04f16735d4f8a61b97230ed (patch) | |
tree | 82a9ca83728a5cebd2dff2066e5a9127355696c4 /tensorflow/compiler/xla/service/executable.h | |
parent | 1988732f81bc5f61cd97c20952d5359fc0bf627f (diff) |
Minor cleanup, now that there's a single Executable::ExecuteOnStream method.
PiperOrigin-RevId: 179630890
Diffstat (limited to 'tensorflow/compiler/xla/service/executable.h')
-rw-r--r-- | tensorflow/compiler/xla/service/executable.h | 74 |
1 files changed, 3 insertions, 71 deletions
diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h index 23864dda78..5ecfdffe21 100644 --- a/tensorflow/compiler/xla/service/executable.h +++ b/tensorflow/compiler/xla/service/executable.h @@ -96,13 +96,10 @@ class Executable { // Convenience wrapper for calling Executable::ExecuteOnStream. Sets up a // timer for the execution, sets up HLO profiling if enabled, and fills in the - // given ExecutionProfile if non-null. The ExecuteOnStream overloads have - // different argument types and return types, so this method is templated on - // argument type and return type of the execute function. - template <typename ReturnT, typename ArgT> - StatusOr<ReturnT> ExecuteOnStreamWrapper( + // given ExecutionProfile if non-null. + StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteOnStreamWrapper( const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile, - const ArgT& arguments); + tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments); // Returns the ExecutionProfile from executing on the device. This includes // the number of cycles taken for the computation or the compilation time. @@ -186,71 +183,6 @@ class Executable { std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map_; }; -template <typename ReturnT, typename ArgT> -StatusOr<ReturnT> Executable::ExecuteOnStreamWrapper( - const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile, - const ArgT& arguments) { - perftools::gputools::Stream* stream = run_options->stream(); - std::unique_ptr<perftools::gputools::Timer> timer; - if (profile != nullptr) { - timer.reset(new perftools::gputools::Timer(stream->parent())); - stream->InitTimer(timer.get()).ThenStartTimer(timer.get()); - } - - VLOG(1) << "enqueueing executable on stream..."; - // If the profiling flag isn't enabled, we pass nullptr as the profile to - // indicate profiling is not requested. - std::unique_ptr<HloExecutionProfile> profile_ptr = - module_config().debug_options().xla_hlo_profile() && - hlo_profiling_enabled() - ? MakeUnique<HloExecutionProfile>(&hlo_profile_printer(), - &hlo_profile_index_map()) - : nullptr; - - auto return_value = - ExecuteOnStream(run_options, arguments, profile_ptr.get()); - - if (profile != nullptr) { - VLOG(1) << "enqueueing 'stop timer' and blocking host until done..."; - stream->ThenStopTimer(timer.get()); - TF_RETURN_IF_ERROR(stream->BlockHostUntilDone()); - VLOG(1) << "done with block-host-until-done"; - - // Merge in run-time profile information from execution_profile. - profile->MergeFrom(execution_profile()); - - // Overall execution time (in nanoseconds) from the executor timer. - if (stream->ok()) { - // Don't read timer->Nanoseconds() if the stream isn't OK -- that's - // illegal. - profile->set_compute_and_transfer_time_ns(timer->Nanoseconds()); - } - - // TODO(b/28123297): On GPU we end up including transfer time in - // the compute time this way. Instead, we should get the correct - // value by measuring it. Setting the field here at least lets - // benchmarks provide *some* value for GPU computations. - // - // TODO(b/28447609): The value in compute_and_transfer_time_ns is actually - // the compute time without the transfer time, so this way we get the - // correct compute time. We should instead have the correct value for - // compute_and_transfer_time and set compute_time to the compute time. - if (profile->compute_time_ns() == 0) { - profile->set_compute_time_ns(profile->compute_and_transfer_time_ns()); - } - } - - if (profile_ptr != nullptr) { - XLA_LOG_LINES( - tensorflow::INFO, - profile_ptr->ToString(stream->parent()->GetDeviceDescription())); - hlo_graph_dumper::MaybeDumpHloModule(module(), "Service::Execute", - profile_ptr.get()); - } - - return return_value; -} - } // namespace xla #endif // TENSORFLOW_COMPILER_XLA_SERVICE_EXECUTABLE_H_ |