aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2018-09-04 13:52:01 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-09-04 13:56:35 -0700
commitffd9519c3fffe43473f06a1c8fdd12519490db3b (patch)
tree065aac9a95e45023dff9de2cf53bae9110c19370
parent0cd9b3e41d993f505feb54ff0b086ffbb21b595d (diff)
Optimize CuboidConvolutionBackwardKernel (Conv3D kernel backprop).
* simplify contraction by collapsing inner dims into single dimension * get rid of expensive reverse op ~5X improvement when compiled with AVX. PiperOrigin-RevId: 211518363
-rw-r--r--tensorflow/core/kernels/eigen_backward_cuboid_convolutions.h304
1 files changed, 96 insertions, 208 deletions
diff --git a/tensorflow/core/kernels/eigen_backward_cuboid_convolutions.h b/tensorflow/core/kernels/eigen_backward_cuboid_convolutions.h
index e13e548f86..3ebeb7be2b 100644
--- a/tensorflow/core/kernels/eigen_backward_cuboid_convolutions.h
+++ b/tensorflow/core/kernels/eigen_backward_cuboid_convolutions.h
@@ -323,47 +323,34 @@ CuboidConvolutionBackwardInput(
template <typename OutputBackward, typename Input>
EIGEN_ALWAYS_INLINE static const typename internal::conditional<
internal::traits<OutputBackward>::Layout == ColMajor,
- const TensorShufflingOp<
- const array<typename internal::traits<OutputBackward>::Index, 5>,
- const TensorReverseOp<
- const array<bool, 5>,
+ TensorReshapingOp<
+ const DSizes<typename internal::traits<Input>::Index, 5>,
+ const TensorContractionOp<
+ const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
const TensorReshapingOp<
- const DSizes<typename internal::traits<OutputBackward>::Index,
- 5>,
- const TensorContractionOp<
- const array<
- IndexPair<typename internal::traits<Input>::Index>, 2>,
- const TensorReshapingOp<
- const DSizes<typename internal::traits<Input>::Index,
- 3>,
- const Input>,
- const TensorReshapingOp<
- const DSizes<
- typename internal::traits<OutputBackward>::Index,
- 4>,
- const TensorVolumePatchOp<
- Dynamic, Dynamic, Dynamic,
- const OutputBackward> > > > > >,
- const TensorShufflingOp<
- const array<typename internal::traits<OutputBackward>::Index, 5>,
- const TensorReverseOp<
- const array<bool, 5>,
+ const DSizes<typename internal::traits<Input>::Index, 2>,
+ const OutputBackward>,
+ const TensorShufflingOp<
+ const array<typename internal::traits<OutputBackward>::Index,
+ 2>,
+ const TensorReshapingOp<
+ const DSizes<typename internal::traits<Input>::Index, 2>,
+ const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
+ const Input> > > > >,
+ TensorReshapingOp<
+ const DSizes<typename internal::traits<Input>::Index, 5>,
+ const TensorContractionOp<
+ const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
+ const TensorShufflingOp<
+ const array<typename internal::traits<OutputBackward>::Index,
+ 2>,
+ const TensorReshapingOp<
+ const DSizes<typename internal::traits<Input>::Index, 2>,
+ const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
+ const Input> > >,
const TensorReshapingOp<
- const DSizes<typename internal::traits<OutputBackward>::Index,
- 5>,
- const TensorContractionOp<
- const array<
- IndexPair<typename internal::traits<Input>::Index>, 2>,
- const TensorReshapingOp<
- const DSizes<
- typename internal::traits<OutputBackward>::Index,
- 4>,
- const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
- const OutputBackward> >,
- const TensorReshapingOp<
- const DSizes<typename internal::traits<Input>::Index,
- 3>,
- const Input> > > > > >::type
+ const DSizes<typename internal::traits<Input>::Index, 2>,
+ const OutputBackward> > > >::type
CuboidConvolutionBackwardKernel(
const Input& input, const OutputBackward& output_backward,
typename internal::traits<Input>::Index kernelPlanes,
@@ -406,213 +393,114 @@ CuboidConvolutionBackwardKernel(
const TensorIndex outputCols =
isColMajor ? out.dimension(3) : out.dimension(NumDims - 4);
+ // Number of filters. This is the same as the output depth.
const TensorIndex kernelFilters =
isColMajor ? out.dimension(0) : out.dimension(NumDims - 1);
+ // Number of channels. This is the same as the input depth.
const TensorIndex kernelChannels =
isColMajor ? in.dimension(0) : in.dimension(NumDims - 1);
- TensorIndex forward_pad_z, forward_pad_y, forward_pad_x;
- const TensorIndex size_z =
- Eigen::divup(inputPlanes, static_cast<TensorIndex>(stridePlanes));
- const TensorIndex size_y =
- Eigen::divup(inputRows, static_cast<TensorIndex>(strideRows));
- const TensorIndex size_x =
- Eigen::divup(inputCols, static_cast<TensorIndex>(strideCols));
-
- // Infer padding type.
- if (size_z == outputPlanes && size_y == outputRows && size_x == outputCols) {
- // SAME padding.
- const TensorIndex dz = numext::maxi<TensorIndex>(
- 0, (size_z - 1) * stridePlanes + kernelPlanes - inputPlanes);
- const TensorIndex dy = numext::maxi<TensorIndex>(
- 0, (size_y - 1) * strideRows + kernelRows - inputRows);
- const TensorIndex dx = numext::maxi<TensorIndex>(
- 0, (size_x - 1) * strideCols + kernelCols - inputCols);
-
- forward_pad_z = dz / 2;
- forward_pad_y = dy / 2;
- forward_pad_x = dx / 2;
- } else {
- // VALID padding.
- forward_pad_z = 0;
- forward_pad_y = 0;
- forward_pad_x = 0;
- }
-
- const TensorIndex padding_ztop = kernelPlanes - 1 - forward_pad_z;
- const TensorIndex padding_top = kernelRows - 1 - forward_pad_y;
- const TensorIndex padding_left = kernelCols - 1 - forward_pad_x;
-
- const TensorIndex padding_zbottom = inputPlanes + kernelPlanes - 1 -
- (outputPlanes - 1) * stridePlanes - 1 -
- padding_ztop;
- const TensorIndex padding_bottom = inputRows + kernelRows - 1 -
- (outputRows - 1) * strideRows - 1 -
- padding_top;
- const TensorIndex padding_right = inputCols + kernelCols - 1 -
- (outputCols - 1) * strideCols - 1 -
- padding_left;
-
- eigen_assert(padding_ztop >= 0);
- eigen_assert(padding_zbottom >= 0);
- eigen_assert(padding_top >= 0);
- eigen_assert(padding_left >= 0);
- eigen_assert(padding_bottom >= 0);
- eigen_assert(padding_right >= 0);
-
- // The output_backward has dimensions out_depth X out_plaens X out_rows X
- // out_cols X OTHERS
- // When we extract the image patches from output_backward (with input as the
- // kernel), it will have dimensions
- // (out_depth) X (input_planes * input_rows * input_cols) X (kernel_planes *
- // kernel_rows * kernel_cols) X OTHERS
- DSizes<TensorIndex, 4> pre_contract_dims;
+ // TODO(ezhulenev): Add support for inflated strides. Without inflated strides
+ // effective kernel planes/rows/cols are always the same as the kernel itself
+ // (see eigen_spatial_convolutions for details).
+ const TensorIndex kernelPlanesEff = kernelPlanes;
+ const TensorIndex kernelRowsEff = kernelRows;
+ const TensorIndex kernelColsEff = kernelCols;
+
+ const TensorIndex padPlanes = numext::maxi<Index>(
+ 0, (outputPlanes - 1) * stridePlanes + kernelPlanesEff - inputPlanes);
+ const TensorIndex padRows = numext::maxi<Index>(
+ 0, (outputRows - 1) * strideRows + kernelRowsEff - inputRows);
+ const TensorIndex padCols = numext::maxi<Index>(
+ 0, (outputCols - 1) * strideCols + kernelColsEff - inputCols);
+
+ const TensorIndex padding_top_z = padPlanes / 2;
+ const TensorIndex padding_bottom_z = padPlanes - padding_top_z;
+ const TensorIndex padding_top = padRows / 2;
+ const TensorIndex padding_bottom = padRows - padding_top;
+ const TensorIndex padding_left = padCols / 2;
+ const TensorIndex padding_right = padCols - padding_left;
+
+ // Reshaped output_backward before contraction.
+ DSizes<TensorIndex, 2> output_dims;
if (isColMajor) {
- pre_contract_dims[0] = kernelFilters;
- pre_contract_dims[1] = inputRows * inputCols * inputPlanes;
- pre_contract_dims[2] = kernelRows * kernelCols * kernelPlanes;
- pre_contract_dims[3] = 1;
+ output_dims[0] = kernelFilters;
+ output_dims[1] = outputPlanes * outputRows * outputCols;
for (int i = 4; i < NumDims; ++i) {
- pre_contract_dims[3] *= out.dimension(i);
+ output_dims[1] *= out.dimension(i);
}
} else {
- pre_contract_dims[3] = kernelFilters;
- pre_contract_dims[2] = inputRows * inputCols * inputPlanes;
- pre_contract_dims[1] = kernelRows * kernelCols * kernelPlanes;
- pre_contract_dims[0] = 1;
+ output_dims[1] = kernelFilters;
+ output_dims[0] = outputCols * outputRows * outputPlanes;
for (int i = 0; i < NumDims - 4; ++i) {
- pre_contract_dims[0] *= out.dimension(i);
+ output_dims[0] *= out.dimension(i);
}
}
- // The input has dimensions in_depth X (input_planes * input_rows *
- // input_cols) X OTHERS
- DSizes<TensorIndex, 3> input_dims;
+ // Reshaped extract_volume_patches(in)
+ DSizes<TensorIndex, 2> pre_contract_dims;
if (isColMajor) {
- input_dims[0] = kernelChannels;
- input_dims[1] = inputRows * inputCols * inputPlanes;
- input_dims[2] = 1;
+ pre_contract_dims[0] =
+ kernelChannels * kernelPlanes * kernelRows * kernelCols;
+ pre_contract_dims[1] = outputPlanes * outputRows * outputCols;
for (int i = 4; i < NumDims; ++i) {
- input_dims[2] *= in.dimension(i);
+ pre_contract_dims[1] *= in.dimension(i);
}
- eigen_assert(input_dims[2] == pre_contract_dims[3]);
+ eigen_assert(output_dims[1] == pre_contract_dims[1]);
} else {
- input_dims[2] = kernelChannels;
- input_dims[1] = inputRows * inputCols * inputPlanes;
- input_dims[0] = 1;
+ pre_contract_dims[1] =
+ kernelCols * kernelRows * kernelPlanes * kernelChannels;
+ pre_contract_dims[0] = outputCols * outputRows * outputPlanes;
for (int i = 0; i < NumDims - 4; ++i) {
- input_dims[0] *= in.dimension(i);
+ pre_contract_dims[0] *= in.dimension(i);
}
- eigen_assert(input_dims[0] == pre_contract_dims[0]);
+ eigen_assert(output_dims[0] == pre_contract_dims[0]);
}
- // We will contract along dimensions (1, 2) in and (1, 3) in out, if
- // this is col-major.
- // For row-major, it's dimensions (0, 1) in and (0, 2) in out.
- array<IndexPair<TensorIndex>, 2> contract_dims;
- if (isColMajor) {
- // col-major: in.contract(output.patches)
- contract_dims[0] = IndexPair<TensorIndex>(1, 1);
- contract_dims[1] = IndexPair<TensorIndex>(2, 3);
- } else {
- // row-major: output.patches.contract(in)
- contract_dims[0] = IndexPair<TensorIndex>(0, 0);
- contract_dims[1] = IndexPair<TensorIndex>(2, 1);
- }
+ array<TensorIndex, 2> shuffle_dims;
+ shuffle_dims[0] = 1;
+ shuffle_dims[1] = 0;
- // After the contraction, the kernel will have dimension
- // in_depth X out_depth X kernel_patches X kernel_rows X kernel_cols
- // We will need to shuffle the first two dimensions and reverse the spatial
- // dimensions.
- // The end shape is:
- // out_depth X in_shape X kernel_planes X kernel_rows X kernel_cols
+ array<IndexPair<TensorIndex>, 1> contract_dims;
+ contract_dims[0] = IndexPair<TensorIndex>(1, 0);
- // This is the shape of the kernel *before* the shuffling.
DSizes<TensorIndex, 5> kernel_dims;
if (isColMajor) {
- kernel_dims[0] = kernelChannels;
- kernel_dims[1] = kernelFilters;
+ kernel_dims[0] = kernelFilters;
+ kernel_dims[1] = kernelChannels;
kernel_dims[2] = kernelPlanes;
kernel_dims[3] = kernelRows;
kernel_dims[4] = kernelCols;
} else {
- kernel_dims[0] = kernelCols;
- kernel_dims[1] = kernelRows;
+ kernel_dims[4] = kernelFilters;
+ kernel_dims[3] = kernelChannels;
kernel_dims[2] = kernelPlanes;
- kernel_dims[3] = kernelFilters;
- kernel_dims[4] = kernelChannels;
- }
-
- // Flip filters and channels.
- array<TensorIndex, 5> kernel_shuffle;
- if (isColMajor) {
- kernel_shuffle[0] = 1;
- kernel_shuffle[1] = 0;
- kernel_shuffle[2] = 2;
- kernel_shuffle[3] = 3;
- kernel_shuffle[4] = 4;
- } else {
- kernel_shuffle[0] = 0;
- kernel_shuffle[1] = 1;
- kernel_shuffle[2] = 2;
- kernel_shuffle[3] = 4;
- kernel_shuffle[4] = 3;
- }
-
- // Reverse the spatial dimensions.
- array<bool, 5> kernel_reverse;
- if (isColMajor) {
- kernel_reverse[0] = false;
- kernel_reverse[1] = false;
- kernel_reverse[2] = true;
- kernel_reverse[3] = true;
- kernel_reverse[4] = true;
- } else {
- kernel_reverse[0] = true;
- kernel_reverse[1] = true;
- kernel_reverse[2] = true;
- kernel_reverse[3] = false;
- kernel_reverse[4] = false;
+ kernel_dims[1] = kernelRows;
+ kernel_dims[0] = kernelCols;
}
- DSizes<TensorIndex, NumDims> strides;
- for (int i = 0; i < NumDims; i++) {
- strides[i] = 1;
- }
- if (isColMajor) {
- strides[1] = stridePlanes;
- strides[2] = strideRows;
- strides[3] = strideCols;
- } else {
- strides[NumDims - 2] = stridePlanes;
- strides[NumDims - 3] = strideRows;
- strides[NumDims - 4] = strideCols;
- }
return choose(
Cond<internal::traits<Input>::Layout == ColMajor>(),
- input.reshape(input_dims)
- .contract(output_backward
+ output_backward.reshape(output_dims)
+ .contract(input
.extract_volume_patches(
- inputPlanes, inputRows, inputCols, 1, 1, 1,
- stridePlanes, strideRows, strideCols,
-
- padding_ztop, padding_zbottom, padding_top,
- padding_bottom, padding_left, padding_right)
- .reshape(pre_contract_dims),
+ kernelPlanes, kernelRows, kernelCols, stridePlanes,
+ strideRows, strideCols, 1, 1, 1, padding_top_z,
+ padding_bottom_z, padding_top, padding_bottom,
+ padding_left, padding_right)
+ .reshape(pre_contract_dims)
+ .shuffle(shuffle_dims),
contract_dims)
- .reshape(kernel_dims)
- .reverse(kernel_reverse)
- .shuffle(kernel_shuffle),
- output_backward
- .extract_volume_patches(inputPlanes, inputRows, inputCols, 1, 1, 1,
- stridePlanes, strideRows, strideCols,
- padding_ztop, padding_zbottom, padding_top,
+ .reshape(kernel_dims),
+ input
+ .extract_volume_patches(kernelPlanes, kernelRows, kernelCols,
+ stridePlanes, strideRows, strideCols, 1, 1, 1,
+ padding_top_z, padding_bottom_z, padding_top,
padding_bottom, padding_left, padding_right)
.reshape(pre_contract_dims)
- .contract(input.reshape(input_dims), contract_dims)
- .reshape(kernel_dims)
- .reverse(kernel_reverse)
- .shuffle(kernel_shuffle));
+ .shuffle(shuffle_dims)
+ .contract(output_backward.reshape(output_dims), contract_dims)
+ .reshape(kernel_dims));
}
} // end namespace Eigen