aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/conv_grad_input_ops.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/kernels/conv_grad_input_ops.cc')
-rw-r--r--tensorflow/core/kernels/conv_grad_input_ops.cc88
1 files changed, 63 insertions, 25 deletions
diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc
index 8bc79bebd9..e79c9465cb 100644
--- a/tensorflow/core/kernels/conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_input_ops.cc
@@ -131,7 +131,8 @@ struct LaunchXsmmBackwardInputConvolution {
typename TTypes<T, 4>::ConstTensor kernel,
typename TTypes<T, 4>::ConstTensor output_backward,
int input_rows, int input_cols, int row_stride,
- int col_stride, TensorFormat data_format) const {
+ int col_stride, int pad_h, int pad_w,
+ TensorFormat data_format) const {
return false;
}
};
@@ -143,7 +144,8 @@ struct LaunchXsmmBackwardInputConvolution<CPUDevice, float> {
typename TTypes<float, 4>::ConstTensor kernel,
typename TTypes<float, 4>::ConstTensor output_backward,
int input_rows, int input_cols, int row_stride,
- int col_stride, TensorFormat data_format) const {
+ int col_stride, int pad_h, int pad_w,
+ TensorFormat data_format) const {
auto batch = input_backward.dimension(0);
auto in_depth = input_backward.dimension(3);
auto out_depth = output_backward.dimension(3);
@@ -162,10 +164,10 @@ struct LaunchXsmmBackwardInputConvolution<CPUDevice, float> {
desc.S = filter_cols;
desc.u = row_stride;
desc.v = col_stride;
- desc.pad_h = 0;
- desc.pad_w = 0;
- desc.pad_h_in = 0; // pad_rows; // ignored by libxsmm for now.
- desc.pad_w_in = 0; // pad_cols; // ignored by libxsmm for now.
+ desc.pad_h = pad_h;
+ desc.pad_w = pad_w;
+ desc.pad_h_in = 0;
+ desc.pad_w_in = 0;
desc.pad_h_out = 0;
desc.pad_w_out = 0;
desc.threads = num_threads;
@@ -174,7 +176,7 @@ struct LaunchXsmmBackwardInputConvolution<CPUDevice, float> {
desc.filter_format =
LIBXSMM_DNN_TENSOR_FORMAT_LIBXSMM; // LIBXSMM_DNN_TENSOR_FORMAT_RSCK;
desc.fuse_ops = LIBXSMM_DNN_CONV_FUSE_NONE;
- desc.options = LIBXSMM_DNN_CONV_OPTION_NONE;
+ desc.options = LIBXSMM_DNN_CONV_OPTION_WU_EXT_FILTER_REDUCE;
desc.datatype = LIBXSMM_DNN_DATATYPE_F32;
auto input_ptr = input_backward.data();
@@ -236,13 +238,31 @@ class Conv2DFastBackpropInputOp : public OpKernel {
context->allocate_output(0, input_shape, &in_backprop));
#if defined TENSORFLOW_USE_LIBXSMM && defined TENSORFLOW_USE_LIBXSMM_BACKWARD
- if (LaunchXsmmBackwardInputConvolution<Device, T>()(
- context, context->eigen_device<Device>(),
- in_backprop->tensor<T, 4>(), filter.tensor<T, 4>(),
- out_backprop.tensor<T, 4>(), dims.spatial_dims[0].input_size,
- dims.spatial_dims[1].input_size, dims.spatial_dims[0].stride,
- dims.spatial_dims[1].stride, data_format_)) {
- return;
+ int64 pad_top, pad_bottom;
+ int64 pad_left, pad_right;
+ OP_REQUIRES_OK(
+ context,
+ GetWindowedOutputSizeVerbose(
+ dims.spatial_dims[0].input_size, dims.spatial_dims[0].filter_size,
+ dims.spatial_dims[0].stride, padding_,
+ &dims.spatial_dims[0].output_size, &pad_top, &pad_bottom));
+ OP_REQUIRES_OK(
+ context,
+ GetWindowedOutputSizeVerbose(
+ dims.spatial_dims[1].input_size, dims.spatial_dims[1].filter_size,
+ dims.spatial_dims[1].stride, padding_,
+ &dims.spatial_dims[1].output_size, &pad_left, &pad_right));
+
+ if (pad_left == pad_right && pad_top == pad_bottom) {
+ if (LaunchXsmmBackwardInputConvolution<Device, T>()(
+ context, context->eigen_device<Device>(),
+ in_backprop->tensor<T, 4>(), filter.tensor<T, 4>(),
+ out_backprop.tensor<T, 4>(), dims.spatial_dims[0].input_size,
+ dims.spatial_dims[1].input_size, (int)dims.spatial_dims[0].stride,
+ (int)dims.spatial_dims[1].stride, (int)pad_top, (int)pad_left,
+ data_format_)) {
+ return;
+ }
}
#endif
@@ -309,21 +329,39 @@ class Conv2DCustomBackpropInputOp : public OpKernel {
OP_REQUIRES_OK(context,
context->allocate_output(0, input_shape, &in_backprop));
+// TODO(andydavis) Consider moving code shared with
+// Conv2DCustomBackpropFilterOp into a shared helper function.
#if defined TENSORFLOW_USE_LIBXSMM && defined TENSORFLOW_USE_LIBXSMM_BACKWARD
- if (LaunchXsmmBackwardInputConvolution<Device, T>()(
- context, context->eigen_device<Device>(),
- in_backprop->tensor<T, 4>(), filter.tensor<T, 4>(),
- out_backprop.tensor<T, 4>(), dims.spatial_dims[0].input_size,
- dims.spatial_dims[1].input_size, dims.spatial_dims[0].stride,
- dims.spatial_dims[1].stride, data_format_)) {
- return;
- }
-#endif
+ int64 pad_top, pad_bottom;
+ int64 pad_left, pad_right;
+ OP_REQUIRES_OK(
+ context,
+ GetWindowedOutputSizeVerbose(
+ dims.spatial_dims[0].input_size, dims.spatial_dims[0].filter_size,
+ dims.spatial_dims[0].stride, padding_,
+ &dims.spatial_dims[0].output_size, &pad_top, &pad_bottom));
+ OP_REQUIRES_OK(
+ context,
+ GetWindowedOutputSizeVerbose(
+ dims.spatial_dims[1].input_size, dims.spatial_dims[1].filter_size,
+ dims.spatial_dims[1].stride, padding_,
+ &dims.spatial_dims[1].output_size, &pad_left, &pad_right));
- // TODO(andydavis) Consider moving code shared with
- // Conv2DCustomBackpropFilterOp into a shared helper function.
+ if (pad_left == pad_right && pad_top == pad_bottom) {
+ if (LaunchXsmmBackwardInputConvolution<Device, T>()(
+ context, context->eigen_device<Device>(),
+ in_backprop->tensor<T, 4>(), filter.tensor<T, 4>(),
+ out_backprop.tensor<T, 4>(), dims.spatial_dims[0].input_size,
+ dims.spatial_dims[1].input_size, (int)dims.spatial_dims[0].stride,
+ (int)dims.spatial_dims[1].stride, (int)pad_top, (int)pad_left,
+ data_format_)) {
+ return;
+ }
+ }
+#else
int64 pad_top, pad_bottom;
int64 pad_left, pad_right;
+#endif
OP_REQUIRES_OK(
context,
GetWindowedOutputSizeVerbose(