aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow
diff options
context:
space:
mode:
authorGravatar Smit Hinsu <hinsu@google.com>2018-07-24 13:16:31 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-07-24 13:20:04 -0700
commite2f8d4a8bdfc4e3970cacc89a6b184297205a1cc (patch)
treef89cb78922b72a72dd130b6b410c5969bb3247a3 /tensorflow
parentbb384118db531a7951735dcdc809b5735bc02a76 (diff)
Add data format as a parameter in ConvParameters to support NHWC format
PiperOrigin-RevId: 205879506
Diffstat (limited to 'tensorflow')
-rw-r--r--tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc3
-rw-r--r--tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h12
-rw-r--r--tensorflow/core/kernels/conv_grad_filter_ops.cc1
-rw-r--r--tensorflow/core/kernels/conv_grad_input_ops.cc1
-rw-r--r--tensorflow/core/kernels/conv_grad_ops_3d.cc2
-rw-r--r--tensorflow/core/kernels/conv_ops.cc1
-rw-r--r--tensorflow/core/kernels/conv_ops_3d.cc1
-rw-r--r--tensorflow/core/kernels/conv_ops_gpu.h20
-rw-r--r--tensorflow/core/kernels/conv_ops_test.cc62
9 files changed, 60 insertions, 43 deletions
diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
index 4554a3d89a..0ccb4583ab 100644
--- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
+++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
@@ -443,6 +443,8 @@ void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>::
: dnn::DataLayout::kBatchDepthYX;
constexpr auto filter_layout = is_int8x4 ? dnn::FilterLayout::kOutputInputYX4
: dnn::FilterLayout::kOutputInputYX;
+ constexpr auto compute_data_format =
+ is_int8x4 ? FORMAT_NCHW_VECT_C : FORMAT_NCHW;
dnn::BatchDescriptor conv_input_desc;
conv_input_desc.set_count(batch_size)
@@ -529,6 +531,7 @@ void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>::
batch_size,
conv_input_depth,
{{conv_input_rows, conv_input_cols}},
+ compute_data_format,
output_depth,
{{filter_rows, filter_cols}},
// TODO(yangzihao): Add support for arbitrary dilations for fused conv.
diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h b/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h
index ba52697679..b9c131a2e9 100644
--- a/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h
+++ b/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h
@@ -29,13 +29,13 @@ namespace tensorflow {
class FusedConvParameters : public ConvParameters {
public:
FusedConvParameters(int64 batch, int64 in_depths, const SpatialArray& in,
- int64 out_depths, const SpatialArray& filter,
- const SpatialArray& dilation, const SpatialArray& stride,
- const SpatialArray& padding, DataType dtype,
- int device_id, bool has_side_input,
+ TensorFormat data_format, int64 out_depths,
+ const SpatialArray& filter, const SpatialArray& dilation,
+ const SpatialArray& stride, const SpatialArray& padding,
+ DataType dtype, int device_id, bool has_side_input,
ActivationMode activation_mode)
- : ConvParameters(batch, in_depths, in, out_depths, filter, dilation,
- stride, padding, dtype, device_id),
+ : ConvParameters(batch, in_depths, in, data_format, out_depths, filter,
+ dilation, stride, padding, dtype, device_id),
activation_mode_(activation_mode),
has_side_input_(has_side_input) {
hash_code_ = Hash64Combine(hash_code_, has_side_input);
diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc
index aca75176a5..63b1bcda43 100644
--- a/tensorflow/core/kernels/conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc
@@ -909,6 +909,7 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
dims.in_depth, // in_depths
{{input_desc.height(), // in_rows
input_desc.width()}}, // in_cols
+ FORMAT_NCHW, // compute_data_format
dims.out_depth, // out_depths
{{dims.spatial_dims[0].filter_size, // filter_rows
dims.spatial_dims[1].filter_size, // filter_cols
diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc
index 63a775afa8..d664a11e73 100644
--- a/tensorflow/core/kernels/conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_input_ops.cc
@@ -957,6 +957,7 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
dims.in_depth, // in_depths
{{input_desc.height(), // in_rows
input_desc.width()}}, // in_cols
+ FORMAT_NCHW, // compute_data_format
dims.out_depth, // out_depths
{{dims.spatial_dims[0].filter_size, // filter_rows
dims.spatial_dims[1].filter_size, // filter_cols
diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc
index 980b1063de..15f1bf9aba 100644
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@@ -716,6 +716,7 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
batch,
in_depth,
{{input_size[0], input_size[1], input_size[2]}},
+ FORMAT_NCHW,
out_depth,
{{filter_size[0], filter_size[1], filter_size[2]}},
{{dilations[0], dilations[1], dilations[2]}},
@@ -1112,6 +1113,7 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
batch,
in_depth,
{{input_size[0], input_size[1], input_size[2]}},
+ FORMAT_NCHW,
out_depth,
{{filter_size[0], filter_size[1], filter_size[2]}},
{{dilations[0], dilations[1], dilations[2]}},
diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc
index 3b9886eece..ef692418d6 100644
--- a/tensorflow/core/kernels/conv_ops.cc
+++ b/tensorflow/core/kernels/conv_ops.cc
@@ -713,6 +713,7 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
in_depths, // in_depths
{{in_rows, // in_rows
in_cols}}, // in_cols
+ FORMAT_NCHW, // compute_data_format
out_depths, // out_depths
{{patch_rows, // filter_rows
patch_cols, // filter_cols
diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc
index 9ec16be67d..a1eed4e68c 100644
--- a/tensorflow/core/kernels/conv_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_ops_3d.cc
@@ -415,6 +415,7 @@ struct LaunchConvOp<GPUDevice, T> {
in_batch,
in_depth,
{{in_planes, in_rows, in_cols}},
+ FORMAT_NCHW,
out_depth,
{{filter_planes, filter_rows, filter_cols}},
{{dilations[0], dilations[1], dilations[2]}},
diff --git a/tensorflow/core/kernels/conv_ops_gpu.h b/tensorflow/core/kernels/conv_ops_gpu.h
index d2c8020bb6..afc611f277 100644
--- a/tensorflow/core/kernels/conv_ops_gpu.h
+++ b/tensorflow/core/kernels/conv_ops_gpu.h
@@ -85,13 +85,15 @@ class ConvParameters {
public:
using SpatialArray = gtl::InlinedVector<int64, 3>;
ConvParameters(int64 batch, int64 in_depths, const SpatialArray& in,
- int64 out_depths, const SpatialArray& filter,
- const SpatialArray& dilation, const SpatialArray& stride,
- const SpatialArray& padding, DataType dtype, int device_id)
+ TensorFormat data_format, int64 out_depths,
+ const SpatialArray& filter, const SpatialArray& dilation,
+ const SpatialArray& stride, const SpatialArray& padding,
+ DataType dtype, int device_id)
: batch_(batch),
in_depths_(in_depths),
out_depths_(out_depths),
in_(in),
+ data_format_(data_format),
filter_(filter),
dilation_(dilation),
stride_(stride),
@@ -101,6 +103,7 @@ class ConvParameters {
hash_code_ = batch;
hash_code_ = Hash64Combine(hash_code_, in_depths);
for (int64 val : in) hash_code_ = Hash64Combine(hash_code_, val);
+ hash_code_ = Hash64Combine(hash_code_, data_format);
hash_code_ = Hash64Combine(hash_code_, out_depths);
for (int64 val : filter) hash_code_ = Hash64Combine(hash_code_, val);
for (int64 val : dilation) hash_code_ = Hash64Combine(hash_code_, val);
@@ -123,6 +126,7 @@ class ConvParameters {
return strings::StrCat(
batch_, ", ", in_depths_, ", ",
"(", str_util::Join(in_, ", "), "), ",
+ ::tensorflow::ToString(data_format_), ", ",
out_depths_, ", ",
"(", str_util::Join(filter_, ", "), "), ",
"(", str_util::Join(dilation_, ", "), "), ",
@@ -148,12 +152,13 @@ class ConvParameters {
protected:
using ParameterDataType =
- std::tuple<int64, int64, SpatialArray, int64, SpatialArray, SpatialArray,
- SpatialArray, SpatialArray, DataType, int>;
+ std::tuple<int64, int64, SpatialArray, TensorFormat, int64, SpatialArray,
+ SpatialArray, SpatialArray, SpatialArray, DataType, int>;
ParameterDataType get_data_as_tuple() const {
- return std::make_tuple(batch_, in_depths_, in_, out_depths_, filter_,
- dilation_, stride_, padding_, dtype_, device_id_);
+ return std::make_tuple(batch_, in_depths_, in_, data_format_, out_depths_,
+ filter_, dilation_, stride_, padding_, dtype_,
+ device_id_);
}
uint64 hash_code_;
@@ -178,6 +183,7 @@ class ConvParameters {
int64 in_depths_;
int64 out_depths_;
SpatialArray in_;
+ TensorFormat data_format_;
SpatialArray filter_;
SpatialArray dilation_;
SpatialArray stride_;
diff --git a/tensorflow/core/kernels/conv_ops_test.cc b/tensorflow/core/kernels/conv_ops_test.cc
index 4f9a96ce17..c281153795 100644
--- a/tensorflow/core/kernels/conv_ops_test.cc
+++ b/tensorflow/core/kernels/conv_ops_test.cc
@@ -44,41 +44,43 @@ struct ConvParametersPeer {
TEST(ConvParameters, WinogradNonfusedAlgoSize) {
ConvParametersPeer conv_params_small = {{
- 1, // batch
- 32, // in_depths
- {{300, // in_rows
- 300}}, // in_cols
- 128, // out_depths
- {{3, // filter_rows
- 3}}, // filter_cols
- {{1, // dilation_rows
- 1}}, // dilation_cols
- {{1, // stride_rows
- 1}}, // stride_cols
- {{0, // padding_rows
- 0}}, // padding_cols
- DT_FLOAT, // tensor datatype
- 0, // device_id
+ 1, // batch
+ 32, // in_depths
+ {{300, // in_rows
+ 300}}, // in_cols
+ FORMAT_NCHW, // compute_data_format
+ 128, // out_depths
+ {{3, // filter_rows
+ 3}}, // filter_cols
+ {{1, // dilation_rows
+ 1}}, // dilation_cols
+ {{1, // stride_rows
+ 1}}, // stride_cols
+ {{0, // padding_rows
+ 0}}, // padding_cols
+ DT_FLOAT, // tensor datatype
+ 0, // device_id
}};
EXPECT_TRUE(
conv_params_small.ShouldIncludeWinogradNonfusedAlgoPreCudnn7<float>());
ConvParametersPeer conv_params_large = {{
- 1, // batch
- 128, // in_depths
- {{300, // in_rows
- 300}}, // in_cols
- 768, // out_depths
- {{3, // filter_rows
- 3}}, // filter_cols
- {{1, // dilation_rows
- 1}}, // dilation_cols
- {{1, // stride_rows
- 1}}, // stride_cols
- {{0, // padding_rows
- 0}}, // padding_cols
- DT_FLOAT, // tensor datatype
- 0, // device_id
+ 1, // batch
+ 128, // in_depths
+ {{300, // in_rows
+ 300}}, // in_cols
+ FORMAT_NCHW, // compute_data_format
+ 768, // out_depths
+ {{3, // filter_rows
+ 3}}, // filter_cols
+ {{1, // dilation_rows
+ 1}}, // dilation_cols
+ {{1, // stride_rows
+ 1}}, // stride_cols
+ {{0, // padding_rows
+ 0}}, // padding_cols
+ DT_FLOAT, // tensor datatype
+ 0, // device_id
}};
EXPECT_FALSE(
conv_params_large.ShouldIncludeWinogradNonfusedAlgoPreCudnn7<float>());