aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Justin Lebar <jlebar@google.com>2018-05-16 16:56:48 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-05-16 16:59:14 -0700
commit6dfde69f2fa8825b0dd829dc10792543c124b230 (patch)
treec5bf0e19bcc99c4cbe748a891656c95af2c56343
parent76728dbee8732054902cda929fb8821576b63509 (diff)
[XLA:GPU] Add op-tracing to XLA:GPU.
PiperOrigin-RevId: 196912575
-rw-r--r--tensorflow/compiler/xla/service/gpu/BUILD1
-rw-r--r--tensorflow/compiler/xla/service/gpu/gpu_executable.cc25
2 files changed, 26 insertions, 0 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 7ee039b3eb..4012f87f2b 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -291,6 +291,7 @@ cc_library(
"//tensorflow/compiler/xla/service:transfer_manager",
"//tensorflow/compiler/xla/service:tuple_points_to_analysis",
"//tensorflow/core:lib",
+ "//tensorflow/core:lib_internal",
"//tensorflow/core:stream_executor_no_cuda",
"//tensorflow/core/platform/default/build_config:cublas_plugin",
"//tensorflow/core/platform/default/build_config:cudnn_plugin",
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
index f8766474a8..25d8f720ea 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
@@ -32,12 +32,15 @@ limitations under the License.
#include "tensorflow/compiler/xla/status_macros.h"
#include "tensorflow/compiler/xla/util.h"
#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/tracing.h"
#include "tensorflow/core/platform/types.h"
namespace xla {
namespace gpu {
namespace {
+using tensorflow::tracing::ScopedAnnotation;
+
// A helper class for profiling HLO in the course of GPU program execution.
// All of the profiling is guarded internally, to avoid the caller needing to
// have lots of conditionals sprinkled around.
@@ -164,8 +167,30 @@ Status GpuExecutable::ExecuteThunks(
sub_streams, hlo_module_->entry_computation());
uint64 start_micros = tensorflow::Env::Default()->NowMicros();
+ // This top-level trace serves two purposes:
+ // 1) It marks the scope of the whole XLA module.
+ // 2) It tells us whether tracing is enabled. We use this to avoid the
+ // expensive HloInstruction::ToString() calls inside the loop below if
+ // tracing is disabled.
+ ScopedAnnotation top_level_annotation(hlo_module_->name(), "XLA GPU module");
+
std::map<const Thunk*, std::unique_ptr<se::Event>> thunk_to_finish_event;
for (Thunk* thunk : thunk_schedule_->TotalOrder()) {
+ // Annotate execution of this op if tracing was enabled when we started
+ // running this module. If tracing is enabled *while* we're running the
+ // module, we won't get any data, but that's probably an OK trade-off.
+ //
+ // TODO(jlebar): Should we cache the results of HloInstruction::ToString(),
+ // since we expect it to be an expensive call?
+ tensorflow::gtl::optional<ScopedAnnotation> op_annotation;
+ if (top_level_annotation.IsEnabled()) {
+ op_annotation.emplace(
+ thunk->hlo_instruction() != nullptr
+ ? thunk->hlo_instruction()->ToString(HloPrintOptions::Canonical())
+ : "<unknown>",
+ "XLA op");
+ }
+
TF_RETURN_IF_ERROR(thunk->Initialize(*this, executor));
int32 stream_no =
thunk_schedule_->StreamNumberForHlo(*thunk->hlo_instruction());