#ifndef TENSORFLOW_KERNELS_OPS_UTIL_H_ #define TENSORFLOW_KERNELS_OPS_UTIL_H_ // This file contains utilities for various operations. #include "tensorflow/core/public/status.h" #include "tensorflow/core/util/padding.h" #include "tensorflow/core/public/tensor_shape.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" namespace tensorflow { // Call this function from a test if op kernels are not being // registered. This can happen if the test is linked in a shared // mode and has no direct references to any code from this directory. void RequireDefaultOps(); // Get2dOutputSize(): Given an input tensor, kernel, stride and padding // type, the function computes the output and padding dimensions. // // Convolution layers take in an input tensor of shape (D, C, R, B), and // convolve it with a set of filters, which can also be presented as a // tensor (D, K, K, M), where M is the number of filters, K is the filter size, // and each 3-dimensional tensor of size (D, K, K) is a filter. For // simplicity we assume that we always use square filters (which is usually the // case in images). It also takes in a few additional parameters: // // Stride (S): the stride with which we apply the filters. This is the offset // between locations where we apply the filters. A larger stride // means that the output will be spatially smaller. // // Padding (P): the padding we apply to the input tensor along the R and C // dimensions. This is usually used to make sure that the spatial dimension // do not shrink when we progress with convolutions. Two types of padding are // often used: // SAME: the pad value is computed so that the output will have size R/S // and C/S. // VALID: no padding is carried out. // The padded area is zero-filled. // // The output dimensions for convolution and many other operations, when given // all the parameters above, are as follows: // - When Padding = SAME: the output size is (B, R', C', M), where // R' = ceil(float(R) / float(S)) // C' = ceil(float(C) / float(S)) // where ceil is the ceiling function. The number of padded rows and columns // are computed as: // Pr = ((R' - 1) * S + K - R) / 2 // Pc = ((C' - 1) * S + K - C) / 2 // When the stride is 1, we have the simplified case // R'=R, C'=C, Pr=Pc=(K-1)/2. // This is where SAME comes from - the output has the same size as the input // has. // // - When Padding = VALID: the output size is computed as // R' = ceil(float(R - K + 1) / float(S)) // C' = ceil(float(C - K + 1) / float(S)) // and the number of padded rows and columns are computed in the same way. // When the stride is 1, we have the simplified case // R'=R-K+1, C'=C-K+1, Pr=0, Pc=0. // // For convolution, mathematically, the output value at location (b, r', c', m) // is the inner product of two vectors: the chunk of input at // (b, (r'*S-Pr) : (r'*S-Pr+K), (c'*S-Pc) : (c'*S-Pc+K), :), // and the filter at (m, :, :, :). // Status Get2dOutputSize(const int in_height, const int in_width, int filter_height, int filter_width, int row_stride, int col_stride, Padding padding, int* new_height, int* new_width, int* pad_rows, int* pad_cols); // Returns the same output dimensions as in Get2dOutputSize, but returns verbose // padding dimensions (top/bottom/left/right). Any excess padding (caused by // an odd padding size value) is added to the 'pad_bottom' and 'pad_right' // dimensions. Status Get2dOutputSizeVerbose(const int in_height, const int in_width, int filter_height, int filter_width, int row_stride, int col_stride, Padding padding, int* new_height, int* new_width, int* pad_top, int* pad_bottom, int* pad_left, int* pad_right); // Calculates broadcast starting index and size. For SAME padding, addition // padding could be applied to right, left, top and bottom. Depending on the // current index, input size, kernel size, stride, padding size, the starting // index and size for broadcast for that dimension are different from the // current index and kernel size. // This is mainly used by gradient algorithms for pooling operations. Status GetBroadcastSize(const int index, const int in_size, const int ksize, const int stride, const int pad_size, int* bindex, int* bsize); // Converts Brain's Padding to Eigen's PaddingType. Eigen::PaddingType BrainPadding2EigenPadding(Padding padding); // Given a shape 's' of a tensor of type T. Returns true iff the // number of bytes occupied by each dim 0 (i.e., &tensor(i + 1, ...) - // &tensor(i, ...)) is multiple of EIGEN_ALIGN_BYTES. template bool IsInnerDimsSizeAligned(const TensorShape& s) { if (s.dims() == 0) return false; const int64 dim0_size = s.dim_size(0); if (dim0_size == 0) return false; const int64 bytes_per_dim0 = (s.num_elements() / dim0_size) * sizeof(T); return bytes_per_dim0 % EIGEN_MAX_ALIGN_BYTES == 0; } // Returns in 'col_data', image patches in storage order (height, width, depth) // extracted from image at 'input_data', which is requred to be in storage // order (batch, height, width, depth). // Implementation written by Yangqing Jia (jiayq). template void Im2col(const T* input_data, const int depth, const int height, const int width, const int filter_h, const int filter_w, const int pad_t, const int pad_l, const int pad_b, const int pad_r, const int stride_h, const int stride_w, T* col_data) { int height_col = (height + pad_t + pad_b - filter_h) / stride_h + 1; int width_col = (width + pad_l + pad_r - filter_w) / stride_w + 1; int h_pad = -pad_t; for (int h = 0; h < height_col; ++h) { int w_pad = -pad_l; for (int w = 0; w < width_col; ++w) { for (int ih = h_pad; ih < h_pad + filter_h; ++ih) { for (int iw = w_pad; iw < w_pad + filter_w; ++iw) { if (ih >= 0 && ih < height && iw >= 0 && iw < width) { memcpy(col_data, input_data + (ih * width + iw) * depth, sizeof(T) * depth); } else { // This should be simply padded with zero. memset(col_data, 0, sizeof(T) * depth); } col_data += depth; } } w_pad += stride_w; } h_pad += stride_h; } } // Returns in 'im_data' image patch in storage order (height, width, depth), // constructed from patches in 'col_data', which is required to be in storage // order (out_height * out_width, filter_height, filter_width, in_depth). // Implementation by Yangqing Jia (jiayq). template void Col2im(const T* col_data, const int depth, const int height, const int width, const int filter_h, const int filter_w, const int pad_t, const int pad_l, const int pad_b, const int pad_r, const int stride_h, const int stride_w, T* im_data) { memset(im_data, 0, sizeof(T) * height * width * depth); int height_col = (height + pad_t + pad_b - filter_h) / stride_h + 1; int width_col = (width + pad_l + pad_r - filter_w) / stride_w + 1; int h_pad = -pad_t; for (int h = 0; h < height_col; ++h) { int w_pad = -pad_l; for (int w = 0; w < width_col; ++w) { T* im_patch_data = im_data + (h_pad * width + w_pad) * depth; for (int ih = h_pad; ih < h_pad + filter_h; ++ih) { for (int iw = w_pad; iw < w_pad + filter_w; ++iw) { if (ih >= 0 && ih < height && iw >= 0 && iw < width) { // TODO(andydavis) Vectorize this loop (if compiler does not). for (int i = 0; i < depth; ++i) { im_patch_data[i] += col_data[i]; } } im_patch_data += depth; col_data += depth; } // Jump over remaining number of depth. im_patch_data += depth * (width - filter_w); } w_pad += stride_w; } h_pad += stride_h; } } } // namespace tensorflow #endif // TENSORFLOW_KERNELS_OPS_UTIL_H_