diff options
author | 2018-07-24 13:16:31 -0700 | |
---|---|---|
committer | 2018-07-24 13:20:04 -0700 | |
commit | e2f8d4a8bdfc4e3970cacc89a6b184297205a1cc (patch) | |
tree | f89cb78922b72a72dd130b6b410c5969bb3247a3 /tensorflow | |
parent | bb384118db531a7951735dcdc809b5735bc02a76 (diff) |
Add data format as a parameter in ConvParameters to support NHWC format
PiperOrigin-RevId: 205879506
Diffstat (limited to 'tensorflow')
-rw-r--r-- | tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc | 3 | ||||
-rw-r--r-- | tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h | 12 | ||||
-rw-r--r-- | tensorflow/core/kernels/conv_grad_filter_ops.cc | 1 | ||||
-rw-r--r-- | tensorflow/core/kernels/conv_grad_input_ops.cc | 1 | ||||
-rw-r--r-- | tensorflow/core/kernels/conv_grad_ops_3d.cc | 2 | ||||
-rw-r--r-- | tensorflow/core/kernels/conv_ops.cc | 1 | ||||
-rw-r--r-- | tensorflow/core/kernels/conv_ops_3d.cc | 1 | ||||
-rw-r--r-- | tensorflow/core/kernels/conv_ops_gpu.h | 20 | ||||
-rw-r--r-- | tensorflow/core/kernels/conv_ops_test.cc | 62 |
9 files changed, 60 insertions, 43 deletions
diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc index 4554a3d89a..0ccb4583ab 100644 --- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc +++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc @@ -443,6 +443,8 @@ void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>:: : dnn::DataLayout::kBatchDepthYX; constexpr auto filter_layout = is_int8x4 ? dnn::FilterLayout::kOutputInputYX4 : dnn::FilterLayout::kOutputInputYX; + constexpr auto compute_data_format = + is_int8x4 ? FORMAT_NCHW_VECT_C : FORMAT_NCHW; dnn::BatchDescriptor conv_input_desc; conv_input_desc.set_count(batch_size) @@ -529,6 +531,7 @@ void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>:: batch_size, conv_input_depth, {{conv_input_rows, conv_input_cols}}, + compute_data_format, output_depth, {{filter_rows, filter_cols}}, // TODO(yangzihao): Add support for arbitrary dilations for fused conv. diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h b/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h index ba52697679..b9c131a2e9 100644 --- a/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h +++ b/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h @@ -29,13 +29,13 @@ namespace tensorflow { class FusedConvParameters : public ConvParameters { public: FusedConvParameters(int64 batch, int64 in_depths, const SpatialArray& in, - int64 out_depths, const SpatialArray& filter, - const SpatialArray& dilation, const SpatialArray& stride, - const SpatialArray& padding, DataType dtype, - int device_id, bool has_side_input, + TensorFormat data_format, int64 out_depths, + const SpatialArray& filter, const SpatialArray& dilation, + const SpatialArray& stride, const SpatialArray& padding, + DataType dtype, int device_id, bool has_side_input, ActivationMode activation_mode) - : ConvParameters(batch, in_depths, in, out_depths, filter, dilation, - stride, padding, dtype, device_id), + : ConvParameters(batch, in_depths, in, data_format, out_depths, filter, + dilation, stride, padding, dtype, device_id), activation_mode_(activation_mode), has_side_input_(has_side_input) { hash_code_ = Hash64Combine(hash_code_, has_side_input); diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc index aca75176a5..63b1bcda43 100644 --- a/tensorflow/core/kernels/conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc @@ -909,6 +909,7 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()( dims.in_depth, // in_depths {{input_desc.height(), // in_rows input_desc.width()}}, // in_cols + FORMAT_NCHW, // compute_data_format dims.out_depth, // out_depths {{dims.spatial_dims[0].filter_size, // filter_rows dims.spatial_dims[1].filter_size, // filter_cols diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc index 63a775afa8..d664a11e73 100644 --- a/tensorflow/core/kernels/conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/conv_grad_input_ops.cc @@ -957,6 +957,7 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()( dims.in_depth, // in_depths {{input_desc.height(), // in_rows input_desc.width()}}, // in_cols + FORMAT_NCHW, // compute_data_format dims.out_depth, // out_depths {{dims.spatial_dims[0].filter_size, // filter_rows dims.spatial_dims[1].filter_size, // filter_cols diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc index 980b1063de..15f1bf9aba 100644 --- a/tensorflow/core/kernels/conv_grad_ops_3d.cc +++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc @@ -716,6 +716,7 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel { batch, in_depth, {{input_size[0], input_size[1], input_size[2]}}, + FORMAT_NCHW, out_depth, {{filter_size[0], filter_size[1], filter_size[2]}}, {{dilations[0], dilations[1], dilations[2]}}, @@ -1112,6 +1113,7 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel { batch, in_depth, {{input_size[0], input_size[1], input_size[2]}}, + FORMAT_NCHW, out_depth, {{filter_size[0], filter_size[1], filter_size[2]}}, {{dilations[0], dilations[1], dilations[2]}}, diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc index 3b9886eece..ef692418d6 100644 --- a/tensorflow/core/kernels/conv_ops.cc +++ b/tensorflow/core/kernels/conv_ops.cc @@ -713,6 +713,7 @@ void LaunchConv2DOp<GPUDevice, T>::operator()( in_depths, // in_depths {{in_rows, // in_rows in_cols}}, // in_cols + FORMAT_NCHW, // compute_data_format out_depths, // out_depths {{patch_rows, // filter_rows patch_cols, // filter_cols diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc index 9ec16be67d..a1eed4e68c 100644 --- a/tensorflow/core/kernels/conv_ops_3d.cc +++ b/tensorflow/core/kernels/conv_ops_3d.cc @@ -415,6 +415,7 @@ struct LaunchConvOp<GPUDevice, T> { in_batch, in_depth, {{in_planes, in_rows, in_cols}}, + FORMAT_NCHW, out_depth, {{filter_planes, filter_rows, filter_cols}}, {{dilations[0], dilations[1], dilations[2]}}, diff --git a/tensorflow/core/kernels/conv_ops_gpu.h b/tensorflow/core/kernels/conv_ops_gpu.h index d2c8020bb6..afc611f277 100644 --- a/tensorflow/core/kernels/conv_ops_gpu.h +++ b/tensorflow/core/kernels/conv_ops_gpu.h @@ -85,13 +85,15 @@ class ConvParameters { public: using SpatialArray = gtl::InlinedVector<int64, 3>; ConvParameters(int64 batch, int64 in_depths, const SpatialArray& in, - int64 out_depths, const SpatialArray& filter, - const SpatialArray& dilation, const SpatialArray& stride, - const SpatialArray& padding, DataType dtype, int device_id) + TensorFormat data_format, int64 out_depths, + const SpatialArray& filter, const SpatialArray& dilation, + const SpatialArray& stride, const SpatialArray& padding, + DataType dtype, int device_id) : batch_(batch), in_depths_(in_depths), out_depths_(out_depths), in_(in), + data_format_(data_format), filter_(filter), dilation_(dilation), stride_(stride), @@ -101,6 +103,7 @@ class ConvParameters { hash_code_ = batch; hash_code_ = Hash64Combine(hash_code_, in_depths); for (int64 val : in) hash_code_ = Hash64Combine(hash_code_, val); + hash_code_ = Hash64Combine(hash_code_, data_format); hash_code_ = Hash64Combine(hash_code_, out_depths); for (int64 val : filter) hash_code_ = Hash64Combine(hash_code_, val); for (int64 val : dilation) hash_code_ = Hash64Combine(hash_code_, val); @@ -123,6 +126,7 @@ class ConvParameters { return strings::StrCat( batch_, ", ", in_depths_, ", ", "(", str_util::Join(in_, ", "), "), ", + ::tensorflow::ToString(data_format_), ", ", out_depths_, ", ", "(", str_util::Join(filter_, ", "), "), ", "(", str_util::Join(dilation_, ", "), "), ", @@ -148,12 +152,13 @@ class ConvParameters { protected: using ParameterDataType = - std::tuple<int64, int64, SpatialArray, int64, SpatialArray, SpatialArray, - SpatialArray, SpatialArray, DataType, int>; + std::tuple<int64, int64, SpatialArray, TensorFormat, int64, SpatialArray, + SpatialArray, SpatialArray, SpatialArray, DataType, int>; ParameterDataType get_data_as_tuple() const { - return std::make_tuple(batch_, in_depths_, in_, out_depths_, filter_, - dilation_, stride_, padding_, dtype_, device_id_); + return std::make_tuple(batch_, in_depths_, in_, data_format_, out_depths_, + filter_, dilation_, stride_, padding_, dtype_, + device_id_); } uint64 hash_code_; @@ -178,6 +183,7 @@ class ConvParameters { int64 in_depths_; int64 out_depths_; SpatialArray in_; + TensorFormat data_format_; SpatialArray filter_; SpatialArray dilation_; SpatialArray stride_; diff --git a/tensorflow/core/kernels/conv_ops_test.cc b/tensorflow/core/kernels/conv_ops_test.cc index 4f9a96ce17..c281153795 100644 --- a/tensorflow/core/kernels/conv_ops_test.cc +++ b/tensorflow/core/kernels/conv_ops_test.cc @@ -44,41 +44,43 @@ struct ConvParametersPeer { TEST(ConvParameters, WinogradNonfusedAlgoSize) { ConvParametersPeer conv_params_small = {{ - 1, // batch - 32, // in_depths - {{300, // in_rows - 300}}, // in_cols - 128, // out_depths - {{3, // filter_rows - 3}}, // filter_cols - {{1, // dilation_rows - 1}}, // dilation_cols - {{1, // stride_rows - 1}}, // stride_cols - {{0, // padding_rows - 0}}, // padding_cols - DT_FLOAT, // tensor datatype - 0, // device_id + 1, // batch + 32, // in_depths + {{300, // in_rows + 300}}, // in_cols + FORMAT_NCHW, // compute_data_format + 128, // out_depths + {{3, // filter_rows + 3}}, // filter_cols + {{1, // dilation_rows + 1}}, // dilation_cols + {{1, // stride_rows + 1}}, // stride_cols + {{0, // padding_rows + 0}}, // padding_cols + DT_FLOAT, // tensor datatype + 0, // device_id }}; EXPECT_TRUE( conv_params_small.ShouldIncludeWinogradNonfusedAlgoPreCudnn7<float>()); ConvParametersPeer conv_params_large = {{ - 1, // batch - 128, // in_depths - {{300, // in_rows - 300}}, // in_cols - 768, // out_depths - {{3, // filter_rows - 3}}, // filter_cols - {{1, // dilation_rows - 1}}, // dilation_cols - {{1, // stride_rows - 1}}, // stride_cols - {{0, // padding_rows - 0}}, // padding_cols - DT_FLOAT, // tensor datatype - 0, // device_id + 1, // batch + 128, // in_depths + {{300, // in_rows + 300}}, // in_cols + FORMAT_NCHW, // compute_data_format + 768, // out_depths + {{3, // filter_rows + 3}}, // filter_cols + {{1, // dilation_rows + 1}}, // dilation_cols + {{1, // stride_rows + 1}}, // stride_cols + {{0, // padding_rows + 0}}, // padding_cols + DT_FLOAT, // tensor datatype + 0, // device_id }}; EXPECT_FALSE( conv_params_large.ShouldIncludeWinogradNonfusedAlgoPreCudnn7<float>()); |