[XLA:GPU] Add op-tracing to XLA:GPU.

PiperOrigin-RevId: 196912575
author: Justin Lebar <jlebar@google.com> 2018-05-16 16:56:48 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-05-16 16:59:14 -0700
commit: 6dfde69f2fa8825b0dd829dc10792543c124b230 (patch)
tree: c5bf0e19bcc99c4cbe748a891656c95af2c56343
parent: 76728dbee8732054902cda929fb8821576b63509 (diff)
2 files changed, 26 insertions, 0 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 7ee039b3eb..4012f87f2b 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -291,6 +291,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:transfer_manager",
         "//tensorflow/compiler/xla/service:tuple_points_to_analysis",
         "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
         "//tensorflow/core:stream_executor_no_cuda",
         "//tensorflow/core/platform/default/build_config:cublas_plugin",
         "//tensorflow/core/platform/default/build_config:cudnn_plugin",
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
index f8766474a8..25d8f720ea 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
@@ -32,12 +32,15 @@ limitations under the License.
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/tracing.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
 namespace gpu {
 namespace {
 
+using tensorflow::tracing::ScopedAnnotation;
+
 // A helper class for profiling HLO in the course of GPU program execution.
 // All of the profiling is guarded internally, to avoid the caller needing to
 // have lots of conditionals sprinkled around.
@@ -164,8 +167,30 @@ Status GpuExecutable::ExecuteThunks(
                                 sub_streams, hlo_module_->entry_computation());
   uint64 start_micros = tensorflow::Env::Default()->NowMicros();
 
+  // This top-level trace serves two purposes:
+  //  1) It marks the scope of the whole XLA module.
+  //  2) It tells us whether tracing is enabled.  We use this to avoid the
+  //     expensive HloInstruction::ToString() calls inside the loop below if
+  //     tracing is disabled.
+  ScopedAnnotation top_level_annotation(hlo_module_->name(), "XLA GPU module");
+
   std::map<const Thunk*, std::unique_ptr<se::Event>> thunk_to_finish_event;
   for (Thunk* thunk : thunk_schedule_->TotalOrder()) {
+    // Annotate execution of this op if tracing was enabled when we started
+    // running this module.  If tracing is enabled *while* we're running the
+    // module, we won't get any data, but that's probably an OK trade-off.
+    //
+    // TODO(jlebar): Should we cache the results of HloInstruction::ToString(),
+    // since we expect it to be an expensive call?
+    tensorflow::gtl::optional<ScopedAnnotation> op_annotation;
+    if (top_level_annotation.IsEnabled()) {
+      op_annotation.emplace(
+          thunk->hlo_instruction() != nullptr
+              ? thunk->hlo_instruction()->ToString(HloPrintOptions::Canonical())
+              : "<unknown>",
+          "XLA op");
+    }
+
     TF_RETURN_IF_ERROR(thunk->Initialize(*this, executor));
     int32 stream_no =
         thunk_schedule_->StreamNumberForHlo(*thunk->hlo_instruction());
author	Justin Lebar <jlebar@google.com>	2018-05-16 16:56:48 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-05-16 16:59:14 -0700
commit	6dfde69f2fa8825b0dd829dc10792543c124b230 (patch)
tree	c5bf0e19bcc99c4cbe748a891656c95af2c56343
parent	76728dbee8732054902cda929fb8821576b63509 (diff)