diff options
Diffstat (limited to 'tensorflow/core/kernels/conv_grad_input_ops.cc')
-rw-r--r-- | tensorflow/core/kernels/conv_grad_input_ops.cc | 88 |
1 files changed, 63 insertions, 25 deletions
diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc index 8bc79bebd9..e79c9465cb 100644 --- a/tensorflow/core/kernels/conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/conv_grad_input_ops.cc @@ -131,7 +131,8 @@ struct LaunchXsmmBackwardInputConvolution { typename TTypes<T, 4>::ConstTensor kernel, typename TTypes<T, 4>::ConstTensor output_backward, int input_rows, int input_cols, int row_stride, - int col_stride, TensorFormat data_format) const { + int col_stride, int pad_h, int pad_w, + TensorFormat data_format) const { return false; } }; @@ -143,7 +144,8 @@ struct LaunchXsmmBackwardInputConvolution<CPUDevice, float> { typename TTypes<float, 4>::ConstTensor kernel, typename TTypes<float, 4>::ConstTensor output_backward, int input_rows, int input_cols, int row_stride, - int col_stride, TensorFormat data_format) const { + int col_stride, int pad_h, int pad_w, + TensorFormat data_format) const { auto batch = input_backward.dimension(0); auto in_depth = input_backward.dimension(3); auto out_depth = output_backward.dimension(3); @@ -162,10 +164,10 @@ struct LaunchXsmmBackwardInputConvolution<CPUDevice, float> { desc.S = filter_cols; desc.u = row_stride; desc.v = col_stride; - desc.pad_h = 0; - desc.pad_w = 0; - desc.pad_h_in = 0; // pad_rows; // ignored by libxsmm for now. - desc.pad_w_in = 0; // pad_cols; // ignored by libxsmm for now. + desc.pad_h = pad_h; + desc.pad_w = pad_w; + desc.pad_h_in = 0; + desc.pad_w_in = 0; desc.pad_h_out = 0; desc.pad_w_out = 0; desc.threads = num_threads; @@ -174,7 +176,7 @@ struct LaunchXsmmBackwardInputConvolution<CPUDevice, float> { desc.filter_format = LIBXSMM_DNN_TENSOR_FORMAT_LIBXSMM; // LIBXSMM_DNN_TENSOR_FORMAT_RSCK; desc.fuse_ops = LIBXSMM_DNN_CONV_FUSE_NONE; - desc.options = LIBXSMM_DNN_CONV_OPTION_NONE; + desc.options = LIBXSMM_DNN_CONV_OPTION_WU_EXT_FILTER_REDUCE; desc.datatype = LIBXSMM_DNN_DATATYPE_F32; auto input_ptr = input_backward.data(); @@ -236,13 +238,31 @@ class Conv2DFastBackpropInputOp : public OpKernel { context->allocate_output(0, input_shape, &in_backprop)); #if defined TENSORFLOW_USE_LIBXSMM && defined TENSORFLOW_USE_LIBXSMM_BACKWARD - if (LaunchXsmmBackwardInputConvolution<Device, T>()( - context, context->eigen_device<Device>(), - in_backprop->tensor<T, 4>(), filter.tensor<T, 4>(), - out_backprop.tensor<T, 4>(), dims.spatial_dims[0].input_size, - dims.spatial_dims[1].input_size, dims.spatial_dims[0].stride, - dims.spatial_dims[1].stride, data_format_)) { - return; + int64 pad_top, pad_bottom; + int64 pad_left, pad_right; + OP_REQUIRES_OK( + context, + GetWindowedOutputSizeVerbose( + dims.spatial_dims[0].input_size, dims.spatial_dims[0].filter_size, + dims.spatial_dims[0].stride, padding_, + &dims.spatial_dims[0].output_size, &pad_top, &pad_bottom)); + OP_REQUIRES_OK( + context, + GetWindowedOutputSizeVerbose( + dims.spatial_dims[1].input_size, dims.spatial_dims[1].filter_size, + dims.spatial_dims[1].stride, padding_, + &dims.spatial_dims[1].output_size, &pad_left, &pad_right)); + + if (pad_left == pad_right && pad_top == pad_bottom) { + if (LaunchXsmmBackwardInputConvolution<Device, T>()( + context, context->eigen_device<Device>(), + in_backprop->tensor<T, 4>(), filter.tensor<T, 4>(), + out_backprop.tensor<T, 4>(), dims.spatial_dims[0].input_size, + dims.spatial_dims[1].input_size, (int)dims.spatial_dims[0].stride, + (int)dims.spatial_dims[1].stride, (int)pad_top, (int)pad_left, + data_format_)) { + return; + } } #endif @@ -309,21 +329,39 @@ class Conv2DCustomBackpropInputOp : public OpKernel { OP_REQUIRES_OK(context, context->allocate_output(0, input_shape, &in_backprop)); +// TODO(andydavis) Consider moving code shared with +// Conv2DCustomBackpropFilterOp into a shared helper function. #if defined TENSORFLOW_USE_LIBXSMM && defined TENSORFLOW_USE_LIBXSMM_BACKWARD - if (LaunchXsmmBackwardInputConvolution<Device, T>()( - context, context->eigen_device<Device>(), - in_backprop->tensor<T, 4>(), filter.tensor<T, 4>(), - out_backprop.tensor<T, 4>(), dims.spatial_dims[0].input_size, - dims.spatial_dims[1].input_size, dims.spatial_dims[0].stride, - dims.spatial_dims[1].stride, data_format_)) { - return; - } -#endif + int64 pad_top, pad_bottom; + int64 pad_left, pad_right; + OP_REQUIRES_OK( + context, + GetWindowedOutputSizeVerbose( + dims.spatial_dims[0].input_size, dims.spatial_dims[0].filter_size, + dims.spatial_dims[0].stride, padding_, + &dims.spatial_dims[0].output_size, &pad_top, &pad_bottom)); + OP_REQUIRES_OK( + context, + GetWindowedOutputSizeVerbose( + dims.spatial_dims[1].input_size, dims.spatial_dims[1].filter_size, + dims.spatial_dims[1].stride, padding_, + &dims.spatial_dims[1].output_size, &pad_left, &pad_right)); - // TODO(andydavis) Consider moving code shared with - // Conv2DCustomBackpropFilterOp into a shared helper function. + if (pad_left == pad_right && pad_top == pad_bottom) { + if (LaunchXsmmBackwardInputConvolution<Device, T>()( + context, context->eigen_device<Device>(), + in_backprop->tensor<T, 4>(), filter.tensor<T, 4>(), + out_backprop.tensor<T, 4>(), dims.spatial_dims[0].input_size, + dims.spatial_dims[1].input_size, (int)dims.spatial_dims[0].stride, + (int)dims.spatial_dims[1].stride, (int)pad_top, (int)pad_left, + data_format_)) { + return; + } + } +#else int64 pad_top, pad_bottom; int64 pad_left, pad_right; +#endif OP_REQUIRES_OK( context, GetWindowedOutputSizeVerbose( |