Use Eigen::CuboidConvolutionBackwardKernel in Conv3DBackpropFilter.

Instead of multiple primitive Eigen ops in Conv3DBackpropFilter, call directly into Eigen function. Modest ~10-25% latency improvement and ~10-20% peak memory reduction. PiperOrigin-RevId: 212701797
author: Eugene Zhulenev <ezhulenev@google.com> 2018-09-12 14:17:17 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-09-12 14:21:00 -0700
commit: c2b3222ac552e9698968c9a212095dbc8b9ca40b (patch)
tree: 3c516598ec6c343116d06da94832fb68c73846a7 /tensorflow/core/kernels/conv_grad_ops_3d.cc
parent: 31a55ce8927d2659dd20cc540a9cde8dcae3c036 (diff)
1 files changed, 12 insertions, 64 deletions
diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc
index ec7c02ac2b..78e8375062 100644
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@@ -322,70 +322,18 @@ class Conv3DBackpropFilterOp : public OpKernel {
       return;
     }
 
-    // For the backprop of the filter, we need to also transpose the
-    // out_backprop.
-    // The shape of backprop is
-    //   [batch, out_z, out_y, out_x, out_depth]
-    // And we need to change it to
-    //   [out_depth, out_x, out_y, out_z, batch]
-    Eigen::DSizes<Eigen::DenseIndex, 5> out_order{4, 1, 2, 3, 0};
-    TensorShape padded_out_shape({out_depth, padded_out_planes, padded_out_rows,
-                                  padded_out_cols, batch});
-    Tensor padded_output;
-    OP_REQUIRES_OK(context,
-                   context->allocate_temp(DataTypeToEnum<T>::v(),
-                                          padded_out_shape, &padded_output));
-    Eigen::DSizes<Eigen::DenseIndex, 5> eigen_strides{1, strides[0], strides[1],
-                                                      strides[2], 1};
-    functor::InflatePadAndShuffle<Device, T, 5, Eigen::DenseIndex>()(
-        context->eigen_device<Device>(), out_backprop.tensor<T, 5>(),
-        eigen_strides, pad_dims, out_order, padded_output.tensor<T, 5>());
-    const Tensor& padded_output_cref = padded_output;
-
-    // For the backprop of the filter, we need to transpose the input.
-    // The shape of input is
-    //   [batch, in_z, in_y, in_x, in_depth]
-    // And we need to change it to
-    //   [in_z, in_y, in_x, batch, in_depth]
-    Eigen::DSizes<Eigen::DenseIndex, 5> in_order{1, 2, 3, 0, 4};
-    TensorShape in_shuffle_shape(
-        {input_size[0], input_size[1], input_size[2], batch, in_depth});
-    Tensor in_shuffle;
-    OP_REQUIRES_OK(context,
-                   context->allocate_temp(DataTypeToEnum<T>::v(),
-                                          in_shuffle_shape, &in_shuffle));
-    // No need for reversing this time.
-    Eigen::array<bool, 5> no_reverse{false, false, false, false, false};
-    functor::ShuffleAndReverse<Device, T, 5, Eigen::DenseIndex>()(
-        context->eigen_device<Device>(), input.tensor<T, 5>(), in_order,
-        no_reverse, in_shuffle.tensor<T, 5>());
-    const Tensor& in_shuffle_cref = in_shuffle;
-
-    // The output of the conv_3d would be
-    //   [out_depth, filter_size[2], filter_size[1], filter_size[0], in_depth]
-    // and we need to shuffle it back to
-    //   [filter_size[2], filter_size[1], filter_size[0], in_depth, out_depth];
-    // And we need to reverse the filter backprops.
-    // So we need to allocate (sigh) yet another piece of memory to hold the
-    // output.
-    TensorShape filter_shuffle_shape(
-        {out_depth, filter_size[0], filter_size[1], filter_size[2], in_depth});
-    Tensor filter_shuffle;
-    OP_REQUIRES_OK(
-        context, context->allocate_temp(DataTypeToEnum<T>::v(),
-                                        filter_shuffle_shape, &filter_shuffle));
-    functor::CuboidConvolution<Device, T>()(
-        context->eigen_device<Device>(), filter_shuffle.tensor<T, 5>(),
-        padded_output_cref.tensor<T, 5>(), in_shuffle_cref.tensor<T, 5>(), 1, 1,
-        1, BrainPadding2EigenPadding(VALID));
-
-    // Now copy the filter_backprop back to the destination.
-    Eigen::DSizes<Eigen::DenseIndex, 5> filter_order{1, 2, 3, 4, 0};
-    Eigen::array<bool, 5> filter_rev_dims{true, true, true, false, false};
-    const Tensor& filter_shuffle_cref = filter_shuffle;
-    functor::ShuffleAndReverse<Device, T, 5, Eigen::DenseIndex>()(
-        context->eigen_device<Device>(), filter_shuffle_cref.tensor<T, 5>(),
-        filter_order, filter_rev_dims, filter_backprop->tensor<T, 5>());
+    // There is no need to explicitly compute padding values (and pad
+    // out_backprop), because Eigen uses the same padding inference mechanism as
+    // Tensorflow.
+    functor::CuboidConvolutionBackwardFilter<Device, T>()(
+        context->eigen_device<Device>(),
+        filter_backprop->tensor<T, 5>(),  // filter_backward
+        input.tensor<T, 5>(),             // input
+        out_backprop.tensor<T, 5>(),      // output_backward
+        // Order of strides will be reversed before passing to Eigen.
+        static_cast<int>(strides[0]),   // stride_planes
+        static_cast<int>(strides[1]),   // stride_rows
+        static_cast<int>(strides[2]));  // stride_cols
   }
 
  private:
author	Eugene Zhulenev <ezhulenev@google.com>	2018-09-12 14:17:17 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-09-12 14:21:00 -0700
commit	c2b3222ac552e9698968c9a212095dbc8b9ca40b (patch)
tree	3c516598ec6c343116d06da94832fb68c73846a7 /tensorflow/core/kernels/conv_grad_ops_3d.cc
parent	31a55ce8927d2659dd20cc540a9cde8dcae3c036 (diff)