aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/gpu/copy_thunk.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/compiler/xla/service/gpu/copy_thunk.cc')
-rw-r--r--tensorflow/compiler/xla/service/gpu/copy_thunk.cc9
1 files changed, 7 insertions, 2 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/copy_thunk.cc b/tensorflow/compiler/xla/service/gpu/copy_thunk.cc
index ee38c0318a..92e03f94c1 100644
--- a/tensorflow/compiler/xla/service/gpu/copy_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/copy_thunk.cc
@@ -15,6 +15,7 @@ limitations under the License.
#include "tensorflow/compiler/xla/service/gpu/copy_thunk.h"
+#include "tensorflow/compiler/xla/service/gpu/hlo_execution_profiler.h"
#include "tensorflow/core/platform/stream_executor_no_cuda.h"
namespace xla {
@@ -30,9 +31,11 @@ HostToDeviceCopyThunk::HostToDeviceCopyThunk(
mem_size_(mem_size) {}
Status HostToDeviceCopyThunk::ExecuteOnStream(
- const BufferAllocations& buffer_allocations, se::Stream* stream) {
+ const BufferAllocations& buffer_allocations, se::Stream* stream,
+ HloExecutionProfiler* profiler) {
se::DeviceMemoryBase destination_data =
buffer_allocations.GetDeviceAddress(destination_buffer_);
+ auto op_profiler = profiler->MakeScopedInstructionProfiler(hlo_instruction());
stream->ThenMemcpy(&destination_data, source_address_, mem_size_);
return Status::OK();
}
@@ -47,11 +50,13 @@ DeviceToDeviceCopyThunk::DeviceToDeviceCopyThunk(
mem_size_(mem_size) {}
Status DeviceToDeviceCopyThunk::ExecuteOnStream(
- const BufferAllocations& buffer_allocations, se::Stream* stream) {
+ const BufferAllocations& buffer_allocations, se::Stream* stream,
+ HloExecutionProfiler* profiler) {
se::DeviceMemoryBase destination_data =
buffer_allocations.GetDeviceAddress(destination_buffer_);
se::DeviceMemoryBase source_data =
buffer_allocations.GetDeviceAddress(source_buffer_);
+ auto op_profiler = profiler->MakeScopedInstructionProfiler(hlo_instruction());
stream->ThenMemcpy(&destination_data, source_data, mem_size_);
return Status::OK();
}