aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/compiler/xla/service/gpu/gpu_executable.cc')
-rw-r--r--tensorflow/compiler/xla/service/gpu/gpu_executable.cc5
1 files changed, 2 insertions, 3 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
index f20a828bc1..0cad2958c7 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
@@ -136,18 +136,17 @@ Status GpuExecutable::ExecuteThunks(
TF_RETURN_IF_ERROR(main_stream->BlockHostUntilDone());
}
- profiler.StartOperation();
VLOG(2) << "Executing the thunk for "
<< thunk->hlo_instruction()->ToString() << " on stream "
<< stream_no;
- TF_RETURN_IF_ERROR(thunk->ExecuteOnStream(buffer_allocations, stream));
+ TF_RETURN_IF_ERROR(
+ thunk->ExecuteOnStream(buffer_allocations, stream, &profiler));
if (thunk_schedule_->Depended(thunk)) {
auto finish_event = MakeUnique<se::Event>(main_stream->parent());
finish_event->Init();
stream->ThenRecordEvent(finish_event.get());
thunk_to_finish_event[thunk] = std::move(finish_event);
}
- profiler.FinishOperation(thunk->hlo_instruction());
}
main_stream->ThenWaitFor(&sub_streams);