aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/fused_conv
diff options
context:
space:
mode:
authorGravatar Jingyue Wu <jingyue@google.com>2017-12-21 11:21:22 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-12-21 11:24:59 -0800
commitc7a05f4b18df0a9bd6b594d6f3d67b7489fcb54e (patch)
tree95c67d661eb72bbcd82d5180de6d41079212e06e /tensorflow/contrib/fused_conv
parent741a94013c4c9319b30534e3c40bdee3d71bd0bd (diff)
Fix padding for int8 fused convolution.
cudnnConvolutionBiasActivationForward doesn't work when filter size >= 6 and there is padding. This CL works around this cudnn bug. PiperOrigin-RevId: 179836819
Diffstat (limited to 'tensorflow/contrib/fused_conv')
-rw-r--r--tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc49
-rw-r--r--tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py30
2 files changed, 72 insertions, 7 deletions
diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
index dc9e5558ed..0e06575d96 100644
--- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
+++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
@@ -34,6 +34,7 @@ limitations under the License.
#include "tensorflow/core/util/use_cudnn.h"
#if GOOGLE_CUDA
+#include "cuda/include/cudnn.h"
#include "tensorflow/core/kernels/conv_ops_gpu.h"
#include "tensorflow/core/platform/stream_executor.h"
#include "tensorflow/core/util/activation_mode.h"
@@ -278,6 +279,28 @@ Status TransformNHWCToNCHW(OpKernelContext* ctx, const Tensor& nhwc_tensor,
return Status::OK();
}
+// Adjusts padding so cudnn supports it. Sets `adjusted_padding` to be the
+// adjusted padding, and `extra_padding_before` and `extra_padding_after` to be
+// the extra padding that FusedConv needs to apply before calling cudnn.
+void AdjustPaddingForCudnn(int padding, bool is_int8x4, int filter_size,
+ int* adjusted_padding, int* extra_padding_before,
+ int* extra_padding_after) {
+#if CUDNN_VERSION < 7000
+ if (is_int8x4 && filter_size >= 6) {
+ // TODO(b/70795525): Remove after NVIDIA fixes this bug with int8 fused
+ // convolution. I don't know cuDNN7 still has the bug, so enable this
+ // workaround for cuDNN6 or older.
+ *adjusted_padding = 0;
+ *extra_padding_before = padding / 2;
+ *extra_padding_after = padding - *extra_padding_before;
+ return;
+ }
+#endif
+ *adjusted_padding = padding / 2 * 2;
+ *extra_padding_before = 0;
+ *extra_padding_after = padding % 2;
+}
+
template <typename T, typename BiasType, typename ScaleType>
void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>::
launch(OpKernelContext* ctx, bool cudnn_use_autotune,
@@ -338,12 +361,21 @@ void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>::
0, (output_rows - 1) * row_stride + filter_rows - conv_input_rows);
padding_cols = std::max<int>(
0, (output_cols - 1) * col_stride + filter_cols - conv_input_cols);
- const int padding_rows_parity = padding_rows & 1;
- const int padding_cols_parity = padding_cols & 1;
- if ((padding_rows_parity | padding_cols_parity) != 0) {
+ int extra_top_padding = 0;
+ int extra_bottom_padding = 0;
+ int extra_left_padding = 0;
+ int extra_right_padding = 0;
+ AdjustPaddingForCudnn(padding_rows, is_int8x4, filter_rows, &padding_rows,
+ &extra_top_padding, &extra_bottom_padding);
+ AdjustPaddingForCudnn(padding_cols, is_int8x4, filter_cols, &padding_cols,
+ &extra_left_padding, &extra_right_padding);
+ if (extra_top_padding != 0 || extra_bottom_padding != 0 ||
+ extra_left_padding != 0 || extra_right_padding != 0) {
Tensor transformed_input;
- const int new_conv_input_rows = conv_input_rows + padding_rows_parity;
- const int new_conv_input_cols = conv_input_cols + padding_cols_parity;
+ const int new_conv_input_rows =
+ conv_input_rows + extra_top_padding + extra_bottom_padding;
+ const int new_conv_input_cols =
+ conv_input_cols + extra_left_padding + extra_right_padding;
using VectT = typename Int8x4ToInt32<typename RawType<T>::type>::type;
auto pad_data_format = is_int8x4 ? FORMAT_NCHW : data_format;
@@ -361,8 +393,9 @@ void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>::
maybe_padded_conv_input.reinterpret_last_dimension<VectT, 4>());
functor::PadInput<GPUDevice, VectT, int, 4>()(
- ctx->eigen_device<GPUDevice>(), conv_input_eigen_tensor, {{0, 0}},
- {{padding_rows_parity, padding_cols_parity}},
+ ctx->eigen_device<GPUDevice>(), conv_input_eigen_tensor,
+ {{extra_top_padding, extra_left_padding}},
+ {{extra_bottom_padding, extra_right_padding}},
padded_conv_input_eigen_tensor, pad_data_format);
conv_input = &maybe_padded_conv_input;
@@ -439,6 +472,8 @@ void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>::
.set_feature_map_count(output_depth)
.set_layout(data_layout);
dnn::ConvolutionDescriptor conv_desc;
+ CHECK_EQ(0, padding_rows % 2);
+ CHECK_EQ(0, padding_cols % 2);
conv_desc.set_vertical_filter_stride(row_stride)
.set_horizontal_filter_stride(col_stride)
.set_zero_padding_height(padding_rows / 2)
diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py
index 2a18f3eeec..bb155aa249 100644
--- a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py
+++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py
@@ -659,6 +659,36 @@ def SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel,
class FusedConvInt8Tests(test.TestCase):
_test_params = [
{
+ "batch_size": 1,
+ "input_channels": 4,
+ "output_channels": 4,
+ "input_height": 8,
+ "input_width": 8,
+ "filter_height": 6,
+ "filter_width": 6,
+ "vertical_stride": 2,
+ "horizontal_stride": 2,
+ "conv_input_scale": 0.002,
+ "side_input_scale": 0.0,
+ "bias_scale": 1,
+ "padding_type": "SAME"
+ },
+ {
+ "batch_size": 1,
+ "input_channels": 4,
+ "output_channels": 4,
+ "input_height": 6,
+ "input_width": 6,
+ "filter_height": 6,
+ "filter_width": 6,
+ "vertical_stride": 2,
+ "horizontal_stride": 2,
+ "conv_input_scale": 0.002,
+ "side_input_scale": 0.0,
+ "bias_scale": 1,
+ "padding_type": "SAME"
+ },
+ {
"batch_size": 2,
"input_channels": 8,
"output_channels": 16,