aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/compiler/xla/service/gpu/convolution_thunk.cc')
-rw-r--r--tensorflow/compiler/xla/service/gpu/convolution_thunk.cc5
1 files changed, 4 insertions, 1 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
index f088112412..7833a4077e 100644
--- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
@@ -18,6 +18,7 @@ limitations under the License.
#include <string>
#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h"
+#include "tensorflow/compiler/xla/service/gpu/hlo_execution_profiler.h"
#include "tensorflow/compiler/xla/types.h"
#include "tensorflow/compiler/xla/util.h"
#include "tensorflow/core/lib/strings/strcat.h"
@@ -55,7 +56,8 @@ ConvolutionThunk::ConvolutionThunk(
tensor_ops_enabled_(tensor_ops_enabled) {}
Status ConvolutionThunk::ExecuteOnStream(
- const BufferAllocations& buffer_allocations, se::Stream* stream) {
+ const BufferAllocations& buffer_allocations, se::Stream* stream,
+ HloExecutionProfiler* profiler) {
se::DeviceMemoryBase input_data =
buffer_allocations.GetDeviceAddress(input_buffer_);
se::DeviceMemoryBase filter_data =
@@ -68,6 +70,7 @@ Status ConvolutionThunk::ExecuteOnStream(
se::dnn::AlgorithmConfig algorithm_config(
se::dnn::AlgorithmDesc(algorithm_, tensor_ops_enabled_));
+ auto op_profiler = profiler->MakeScopedInstructionProfiler(hlo_instruction());
TF_RETURN_IF_ERROR(RunCudnnConvolution(
convolution_kind_, input_shape_, filter_shape_, output_shape_, input_data,
filter_data, output_data, scratch, window_, dim_nums_, algorithm_config,