diff options
Diffstat (limited to 'tensorflow/compiler/xla/service/gpu/convolution_thunk.cc')
-rw-r--r-- | tensorflow/compiler/xla/service/gpu/convolution_thunk.cc | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc index f088112412..7833a4077e 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc @@ -18,6 +18,7 @@ limitations under the License. #include <string> #include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h" +#include "tensorflow/compiler/xla/service/gpu/hlo_execution_profiler.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/strings/strcat.h" @@ -55,7 +56,8 @@ ConvolutionThunk::ConvolutionThunk( tensor_ops_enabled_(tensor_ops_enabled) {} Status ConvolutionThunk::ExecuteOnStream( - const BufferAllocations& buffer_allocations, se::Stream* stream) { + const BufferAllocations& buffer_allocations, se::Stream* stream, + HloExecutionProfiler* profiler) { se::DeviceMemoryBase input_data = buffer_allocations.GetDeviceAddress(input_buffer_); se::DeviceMemoryBase filter_data = @@ -68,6 +70,7 @@ Status ConvolutionThunk::ExecuteOnStream( se::dnn::AlgorithmConfig algorithm_config( se::dnn::AlgorithmDesc(algorithm_, tensor_ops_enabled_)); + auto op_profiler = profiler->MakeScopedInstructionProfiler(hlo_instruction()); TF_RETURN_IF_ERROR(RunCudnnConvolution( convolution_kind_, input_shape_, filter_shape_, output_shape_, input_data, filter_data, output_data, scratch, window_, dim_nums_, algorithm_config, |