/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #ifndef TENSORFLOW_CORE_KERNELS_EIGEN_BACKWARD_SPATIAL_CONVOLUTIONS_H_ #define TENSORFLOW_CORE_KERNELS_EIGEN_BACKWARD_SPATIAL_CONVOLUTIONS_H_ #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" namespace Eigen { /** SpatialConvolutionBackwardInput * \ingroup CXX11_NeuralNetworks_Module * * \brief Computes the backprop for the input of a 2D convolution. * * The output_backward parameter is expected to be a tensor with a rank of 3 or * more (channels, height, width, and optionally others) * The kernel parameter is expected to be a 4D tensor (filters, channels, * kernel_height, kernel_width) * The output_backward and the kernel must both be in col-major layout. The * result will also be in col-major layout. * * If row_in_stride, col_in_stride > 1, then applies convolution with holes * (aka atrous convolution), sampling every row_in_stride, col_in_stride input * pixels. * * The result can be assigned to a tensor of rank equal to the rank of the * output_backward. The dimensions of the result will be filters, height, width * (and others if applicable). * * It is possible to swap the order of the width and height dimensions provided * that the same order is used in the input, the kernel, and the output. * */ #ifdef EIGEN_HAS_INDEX_LIST typedef IndexList, type2index<0>, type2index<1>, type2index<1> > ReverseColMajor; typedef IndexList, type2index<1>, type2index<0>, type2index<0> > ReverseRowMajor; #else typedef array ReverseColMajor; typedef array ReverseRowMajor; #endif template EIGEN_ALWAYS_INLINE static const typename internal::conditional< internal::traits::Layout == ColMajor, TensorReshapingOp< const DSizes::Index, internal::traits::NumDimensions>, const TensorContractionOp< const array< IndexPair::Index>, 1>, const Eigen::TensorForcedEvalOp::Index, 2>, const TensorShufflingOp< const array< typename internal::traits::Index, 4>, const TensorReverseOp > > >, const TensorReshapingOp< const DSizes::Index, 2>, const TensorImagePatchOp > > >, TensorReshapingOp< const DSizes::Index, internal::traits::NumDimensions>, const TensorContractionOp< const array< IndexPair::Index>, 1>, const TensorReshapingOp< const DSizes::Index, 2>, const TensorImagePatchOp >, const Eigen::TensorForcedEvalOp::Index, 2>, const TensorShufflingOp< const array< typename internal::traits::Index, 4>, const TensorReverseOp > > > > > >::type SpatialConvolutionBackwardInput( const Kernel& kernel, const OutputBackward& output_backward, typename internal::traits::Index inputRows, typename internal::traits::Index inputCols, const DenseIndex row_stride = 1, const DenseIndex col_stride = 1, const DenseIndex row_in_stride = 1, const DenseIndex col_in_stride = 1) { typedef typename internal::traits::Index TensorIndex; typedef typename internal::traits::Scalar OutScalar; TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > kern(kernel); TensorRef::NumDimensions, internal::traits::Layout, TensorIndex> > out(output_backward); EIGEN_STATIC_ASSERT(internal::traits::Layout == internal::traits::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE); static const bool isColMajor = (internal::traits::Layout == ColMajor); static const int NumDims = internal::traits::NumDimensions; // Number of filters to apply. This is the same as the output depth of the // result const TensorIndex kernelFilters = isColMajor ? kern.dimensions()[0] : kern.dimensions()[3]; // Number of channels. This is the same as the input depth. const TensorIndex kernelChannels = isColMajor ? kern.dimensions()[1] : kern.dimensions()[2]; const TensorIndex kernelRows = isColMajor ? kern.dimensions()[2] : kern.dimensions()[1]; const TensorIndex kernelCols = isColMajor ? kern.dimensions()[3] : kern.dimensions()[0]; // This is the effective kernel size, taking into account the (*_in_stride - // 1) zero-values // inserted between consecutive kernel elements in atrous convolution const TensorIndex kernelRowsEff = kernelRows + (kernelRows - 1) * (row_in_stride - 1); const TensorIndex kernelColsEff = kernelCols + (kernelCols - 1) * (col_in_stride - 1); const TensorIndex outputRows = isColMajor ? output_backward.dimension(1) : output_backward.dimension(NumDims - 2); const TensorIndex outputCols = isColMajor ? output_backward.dimension(2) : output_backward.dimension(NumDims - 3); // Computing the forward padding const TensorIndex forward_pad_top = numext::maxi( 0, ((outputRows - 1) * row_stride + kernelRowsEff - inputRows) / 2); const TensorIndex forward_pad_left = numext::maxi( 0, ((outputCols - 1) * col_stride + kernelColsEff - inputCols) / 2); const TensorIndex padding_top = kernelRowsEff - 1 - forward_pad_top; const TensorIndex padding_left = kernelColsEff - 1 - forward_pad_left; const TensorIndex padding_bottom = inputRows - (outputRows - 1) * row_stride - 2 - padding_top + kernelRowsEff; const TensorIndex padding_right = inputCols - (outputCols - 1) * col_stride - 2 - padding_left + kernelColsEff; eigen_assert(padding_top >= 0); eigen_assert(padding_left >= 0); eigen_assert(padding_bottom >= 0); eigen_assert(padding_right >= 0); // The kernel has dimensions filters X channels X patch_rows X patch_cols // We need to reverse the kernel along dimensions corresponding to rows and // cols. // TODO(yangke): we can make things slightly faster by collapsing the // dimensions // where we don't reverse. Try that once we have a faster compiler. typedef typename internal::conditional::type Reverse; Reverse kernel_reverse; #ifndef EIGEN_HAS_INDEX_LIST if (isColMajor) { kernel_reverse[0] = false; kernel_reverse[1] = false; kernel_reverse[2] = true; kernel_reverse[3] = true; } else { kernel_reverse[0] = true; kernel_reverse[1] = true; kernel_reverse[2] = false; kernel_reverse[3] = false; } #endif // Reorder the dimensions to: // filters x patch_rows x patch_cols x channels array kernel_shuffle; if (isColMajor) { // From: filters x channels x rows x cols // To: filters x rows x cols x channels kernel_shuffle[0] = 0; kernel_shuffle[1] = 2; kernel_shuffle[2] = 3; kernel_shuffle[3] = 1; } else { // From: cols x rows x channels x filters // To: channels x cols x rows x filters kernel_shuffle[0] = 2; kernel_shuffle[1] = 0; kernel_shuffle[2] = 1; kernel_shuffle[3] = 3; } // Collapse the dims DSizes kernel_dims; if (isColMajor) { kernel_dims[0] = kernelFilters * kernelRows * kernelCols; kernel_dims[1] = kernelChannels; } else { kernel_dims[1] = kernelFilters * kernelRows * kernelCols; kernel_dims[0] = kernelChannels; } // The output_backward has dimensions out_depth X out_rows X out_cols X OTHERS // When we extract the image patches from output_backward, it will have // dimensions // out_depth X (patch_rows * patch_cols) X (input_rows * input_cols * // OTHERS) DSizes pre_contract_dims; if (isColMajor) { pre_contract_dims[0] = kernelFilters * kernelRows * kernelCols; pre_contract_dims[1] = inputRows * inputCols; for (int i = 3; i < NumDims; ++i) { pre_contract_dims[1] *= out.dimension(i); } } else { pre_contract_dims[1] = kernelFilters * kernelRows * kernelCols; pre_contract_dims[0] = inputRows * inputCols; for (int i = 0; i < NumDims - 3; ++i) { pre_contract_dims[0] *= out.dimension(i); } } // We will contract along the collapsed dimension that contains the // kernelFilters, the kernelRows and the kernelCols. array, 1> contract_dims; if (isColMajor) { // col-major: kernel.contract(output.patches) contract_dims[0] = IndexPair(0, 0); } else { // row-major: output.patches.contract(kernel) contract_dims[0] = IndexPair(1, 1); } // Post contraction, the dimensions of the input_backprop is // channels X input_rows X input_cols X OTHERS DSizes post_contract_dims; if (isColMajor) { post_contract_dims[0] = kernelChannels; post_contract_dims[1] = inputRows; post_contract_dims[2] = inputCols; for (int i = 3; i < NumDims; ++i) { post_contract_dims[i] = out.dimension(i); } } else { post_contract_dims[NumDims - 1] = kernelChannels; post_contract_dims[NumDims - 2] = inputRows; post_contract_dims[NumDims - 3] = inputCols; for (int i = 0; i < NumDims - 3; ++i) { post_contract_dims[i] = out.dimension(i); } } return choose( Cond::Layout == ColMajor>(), kernel.reverse(kernel_reverse) .shuffle(kernel_shuffle) .reshape(kernel_dims) .eval() .contract( output_backward .extract_image_patches( kernelRows, kernelCols, 1, 1, row_in_stride, col_in_stride, row_stride, col_stride, padding_top, padding_bottom, padding_left, padding_right, OutScalar(0)) .reshape(pre_contract_dims), contract_dims) .reshape(post_contract_dims), output_backward .extract_image_patches(kernelRows, kernelCols, 1, 1, row_in_stride, col_in_stride, row_stride, col_stride, padding_top, padding_bottom, padding_left, padding_right, OutScalar(0)) .reshape(pre_contract_dims) .contract(kernel.reverse(kernel_reverse) .shuffle(kernel_shuffle) .reshape(kernel_dims) .eval(), contract_dims) .reshape(post_contract_dims)); } /** SpatialConvolutionBackwardKernel * \ingroup CXX11_NeuralNetworks_Module * * \brief Computes the backprop for the filter of a 2D convolution. * * The output_backward parameter is expected to be a tensor with a rank of 3 or * more (channels, height, width, and optionally others) * The kernel parameter is expected to be a 4D tensor (filters, channels, * kernel_height, kernel_width) * The output_backward and the kernel must both be in col-major layout. The * result will also be in col-major layout. * * If row_in_stride, col_stride > 1, then applies convolution with holes (aka * atrous convolution), sampling every row_in_stride, col_in_stride input * pixels. * * The result can be assigned to a tensor of rank equal to the rank of the * output_backward. The dimensions of the result will be filters, height, width * (and others if applicable). * * It is possible to swap the order of the width and height dimensions provided * that the same order is used in the input, the kernel, and the output. * */ template EIGEN_ALWAYS_INLINE static const typename internal::conditional< internal::traits::Layout == ColMajor, TensorReshapingOp< const DSizes::Index, 4>, const TensorContractionOp< const array::Index>, 1>, const TensorReshapingOp< const DSizes::Index, 2>, const OutputBackward>, const TensorReshapingOp< const DSizes::Index, 2>, const TensorImagePatchOp > > >, TensorReshapingOp< const DSizes::Index, 4>, const TensorContractionOp< const array::Index>, 1>, const TensorReshapingOp< const DSizes::Index, 2>, const TensorImagePatchOp >, const TensorReshapingOp< const DSizes::Index, 2>, const OutputBackward> > > >::type SpatialConvolutionBackwardKernel( const Input& input, const OutputBackward& output_backward, typename internal::traits::Index kernelRows, typename internal::traits::Index kernelCols, const DenseIndex row_stride = 1, const DenseIndex col_stride = 1, const DenseIndex row_in_stride = 1, const DenseIndex col_in_stride = 1) { typedef typename internal::traits::Index TensorIndex; typedef typename internal::traits::Scalar OutScalar; TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > in(input); TensorRef::NumDimensions, internal::traits::Layout, TensorIndex> > out(output_backward); EIGEN_STATIC_ASSERT(internal::traits::Layout == internal::traits::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE); // stride and in_stride cannot both be larger than 1 eigen_assert(!(row_stride > 1 && row_in_stride > 1) && !(col_stride > 1 && col_in_stride > 1)); static const bool isColMajor = (internal::traits::Layout == ColMajor); static const int NumDims = internal::traits::NumDimensions; EIGEN_STATIC_ASSERT(internal::traits::NumDimensions == internal::traits::NumDimensions, YOU_MADE_A_PROGRAMMING_MISTAKE); const TensorIndex inputRows = isColMajor ? in.dimension(1) : in.dimension(NumDims - 2); const TensorIndex inputCols = isColMajor ? in.dimension(2) : in.dimension(NumDims - 3); const TensorIndex outputRows = isColMajor ? output_backward.dimension(1) : output_backward.dimension(NumDims - 2); const TensorIndex outputCols = isColMajor ? output_backward.dimension(2) : output_backward.dimension(NumDims - 3); // Number of filters to apply. This is the same as the output depth of the // result const TensorIndex kernelFilters = isColMajor ? out.dimensions()[0] : out.dimensions()[NumDims - 1]; // Number of channels. This is the same as the input depth. const TensorIndex kernelChannels = isColMajor ? in.dimensions()[0] : in.dimensions()[NumDims - 1]; // This is the effective kernel size, taking into account the (*_in_stride - // 1) zero-values // inserted between consecutive kernel elements in atrous convolution const TensorIndex kernelRowsEff = kernelRows + (kernelRows - 1) * (row_in_stride - 1); const TensorIndex kernelColsEff = kernelCols + (kernelCols - 1) * (col_in_stride - 1); // Computing the forward padding const TensorIndex padRows = numext::maxi( 0, (outputRows - 1) * row_stride + kernelRowsEff - inputRows); const TensorIndex padCols = numext::maxi( 0, (outputCols - 1) * col_stride + kernelColsEff - inputCols); const TensorIndex padding_top = padRows / 2; const TensorIndex padding_bottom = padRows - padding_top; const TensorIndex padding_left = padCols / 2; const TensorIndex padding_right = padCols - padding_left; // Reshaped out DSizes output_dims; if (isColMajor) { output_dims[0] = kernelFilters; output_dims[1] = outputRows * outputCols; for (int i = 3; i < NumDims; ++i) { output_dims[1] *= out.dimension(i); } } else { output_dims[1] = kernelFilters; output_dims[0] = outputCols * outputRows; for (int i = 0; i < NumDims - 3; ++i) { output_dims[0] *= out.dimension(i); } } // Reshaped extract_image_patches(in) DSizes pre_contract_dims; if (isColMajor) { pre_contract_dims[0] = kernelChannels * kernelRows * kernelCols; pre_contract_dims[1] = outputRows * outputCols; for (int i = 3; i < NumDims; ++i) { pre_contract_dims[1] *= in.dimension(i); } eigen_assert(output_dims[1] == pre_contract_dims[1]); } else { pre_contract_dims[1] = kernelCols * kernelRows * kernelChannels; pre_contract_dims[0] = outputRows * outputCols; for (int i = 0; i < NumDims - 3; ++i) { pre_contract_dims[0] *= in.dimension(i); } eigen_assert(output_dims[0] == pre_contract_dims[0]); } // We will contract along the collapsed dimension that contains the // outputCols, outputRows and OTHERS. array, 1> contract_dims; if (isColMajor) { // col-major: output_backward.contract(input.patches) contract_dims[0] = IndexPair(1, 1); } else { // row-major: input.patches.contract(output_backward) contract_dims[0] = IndexPair(0, 0); } // After the contraction, the kernel will have the desired shape // out_depth X in_shape X kernel_rows X kernel_cols DSizes kernel_dims; if (isColMajor) { kernel_dims[0] = kernelFilters; kernel_dims[1] = kernelChannels; kernel_dims[2] = kernelRows; kernel_dims[3] = kernelCols; } else { kernel_dims[3] = kernelFilters; kernel_dims[2] = kernelChannels; kernel_dims[1] = kernelRows; kernel_dims[0] = kernelCols; } return choose( Cond::Layout == ColMajor>(), output_backward.reshape(output_dims) .contract( input .extract_image_patches( kernelRows, kernelCols, row_stride, col_stride, row_in_stride, col_in_stride, 1, 1, padding_top, padding_bottom, padding_left, padding_right, OutScalar(0)) .reshape(pre_contract_dims), contract_dims) .reshape(kernel_dims), input .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride, row_in_stride, col_in_stride, 1, 1, padding_top, padding_bottom, padding_left, padding_right, OutScalar(0)) .reshape(pre_contract_dims) .contract(output_backward.reshape(output_dims), contract_dims) .reshape(kernel_dims)); } } // end namespace Eigen #endif // TENSORFLOW_CORE_KERNELS_EIGEN_BACKWARD_SPATIAL_CONVOLUTIONS_H_