diff options
Diffstat (limited to 'tensorflow/core/kernels/ops_util.h')
-rw-r--r-- | tensorflow/core/kernels/ops_util.h | 180 |
1 files changed, 180 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/ops_util.h b/tensorflow/core/kernels/ops_util.h new file mode 100644 index 0000000000..283338f8df --- /dev/null +++ b/tensorflow/core/kernels/ops_util.h @@ -0,0 +1,180 @@ +#ifndef TENSORFLOW_KERNELS_OPS_UTIL_H_ +#define TENSORFLOW_KERNELS_OPS_UTIL_H_ + +// This file contains utilities for various operations. + +#include "tensorflow/core/public/status.h" +#include "tensorflow/core/util/padding.h" +#include "tensorflow/core/public/tensor_shape.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace tensorflow { + +// Call this function from a test if op kernels are not being +// registered. This can happen if the test is linked in a shared +// mode and has no direct references to any code from this directory. +void RequireDefaultOps(); + +// Get2dOutputSize(): Given an input tensor, kernel, stride and padding +// type, the function computes the output and padding dimensions. +// +// Convolution layers take in an input tensor of shape (D, C, R, B), and +// convolve it with a set of filters, which can also be presented as a +// tensor (D, K, K, M), where M is the number of filters, K is the filter size, +// and each 3-dimensional tensor of size (D, K, K) is a filter. For +// simplicity we assume that we always use square filters (which is usually the +// case in images). It also takes in a few additional parameters: +// +// Stride (S): the stride with which we apply the filters. This is the offset +// between locations where we apply the filters. A larger stride +// means that the output will be spatially smaller. +// +// Padding (P): the padding we apply to the input tensor along the R and C +// dimensions. This is usually used to make sure that the spatial dimension +// do not shrink when we progress with convolutions. Two types of padding are +// often used: +// SAME: the pad value is computed so that the output will have size R/S +// and C/S. +// VALID: no padding is carried out. +// The padded area is zero-filled. +// +// The output dimensions for convolution and many other operations, when given +// all the parameters above, are as follows: +// - When Padding = SAME: the output size is (B, R', C', M), where +// R' = ceil(float(R) / float(S)) +// C' = ceil(float(C) / float(S)) +// where ceil is the ceiling function. The number of padded rows and columns +// are computed as: +// Pr = ((R' - 1) * S + K - R) / 2 +// Pc = ((C' - 1) * S + K - C) / 2 +// When the stride is 1, we have the simplified case +// R'=R, C'=C, Pr=Pc=(K-1)/2. +// This is where SAME comes from - the output has the same size as the input +// has. +// +// - When Padding = VALID: the output size is computed as +// R' = ceil(float(R - K + 1) / float(S)) +// C' = ceil(float(C - K + 1) / float(S)) +// and the number of padded rows and columns are computed in the same way. +// When the stride is 1, we have the simplified case +// R'=R-K+1, C'=C-K+1, Pr=0, Pc=0. +// +// For convolution, mathematically, the output value at location (b, r', c', m) +// is the inner product of two vectors: the chunk of input at +// (b, (r'*S-Pr) : (r'*S-Pr+K), (c'*S-Pc) : (c'*S-Pc+K), :), +// and the filter at (m, :, :, :). +// +Status Get2dOutputSize(const int in_height, const int in_width, + int filter_height, int filter_width, int row_stride, + int col_stride, Padding padding, int* new_height, + int* new_width, int* pad_rows, int* pad_cols); + +// Returns the same output dimensions as in Get2dOutputSize, but returns verbose +// padding dimensions (top/bottom/left/right). Any excess padding (caused by +// an odd padding size value) is added to the 'pad_bottom' and 'pad_right' +// dimensions. +Status Get2dOutputSizeVerbose(const int in_height, const int in_width, + int filter_height, int filter_width, + int row_stride, int col_stride, Padding padding, + int* new_height, int* new_width, int* pad_top, + int* pad_bottom, int* pad_left, int* pad_right); + +// Calculates broadcast starting index and size. For SAME padding, addition +// padding could be applied to right, left, top and bottom. Depending on the +// current index, input size, kernel size, stride, padding size, the starting +// index and size for broadcast for that dimension are different from the +// current index and kernel size. +// This is mainly used by gradient algorithms for pooling operations. +Status GetBroadcastSize(const int index, const int in_size, + const int ksize, const int stride, + const int pad_size, int* bindex, int* bsize); + +// Converts Brain's Padding to Eigen's PaddingType. +Eigen::PaddingType BrainPadding2EigenPadding(Padding padding); + +// Given a shape 's' of a tensor of type T. Returns true iff the +// number of bytes occupied by each dim 0 (i.e., &tensor(i + 1, ...) - +// &tensor(i, ...)) is multiple of EIGEN_ALIGN_BYTES. +template <typename T> +bool IsInnerDimsSizeAligned(const TensorShape& s) { + if (s.dims() == 0) return false; + const int64 dim0_size = s.dim_size(0); + if (dim0_size == 0) return false; + const int64 bytes_per_dim0 = (s.num_elements() / dim0_size) * sizeof(T); + return bytes_per_dim0 % EIGEN_MAX_ALIGN_BYTES == 0; +} + +// Returns in 'col_data', image patches in storage order (height, width, depth) +// extracted from image at 'input_data', which is requred to be in storage +// order (batch, height, width, depth). +// Implementation written by Yangqing Jia (jiayq). +template <typename T> +void Im2col(const T* input_data, const int depth, const int height, + const int width, const int filter_h, const int filter_w, + const int pad_t, const int pad_l, const int pad_b, const int pad_r, + const int stride_h, const int stride_w, T* col_data) { + int height_col = (height + pad_t + pad_b - filter_h) / stride_h + 1; + int width_col = (width + pad_l + pad_r - filter_w) / stride_w + 1; + + int h_pad = -pad_t; + for (int h = 0; h < height_col; ++h) { + int w_pad = -pad_l; + for (int w = 0; w < width_col; ++w) { + for (int ih = h_pad; ih < h_pad + filter_h; ++ih) { + for (int iw = w_pad; iw < w_pad + filter_w; ++iw) { + if (ih >= 0 && ih < height && iw >= 0 && iw < width) { + memcpy(col_data, input_data + (ih * width + iw) * depth, + sizeof(T) * depth); + } else { + // This should be simply padded with zero. + memset(col_data, 0, sizeof(T) * depth); + } + col_data += depth; + } + } + w_pad += stride_w; + } + h_pad += stride_h; + } +} + +// Returns in 'im_data' image patch in storage order (height, width, depth), +// constructed from patches in 'col_data', which is required to be in storage +// order (out_height * out_width, filter_height, filter_width, in_depth). +// Implementation by Yangqing Jia (jiayq). +template <typename T> +void Col2im(const T* col_data, const int depth, const int height, + const int width, const int filter_h, const int filter_w, + const int pad_t, const int pad_l, const int pad_b, const int pad_r, + const int stride_h, const int stride_w, T* im_data) { + memset(im_data, 0, sizeof(T) * height * width * depth); + int height_col = (height + pad_t + pad_b - filter_h) / stride_h + 1; + int width_col = (width + pad_l + pad_r - filter_w) / stride_w + 1; + int h_pad = -pad_t; + for (int h = 0; h < height_col; ++h) { + int w_pad = -pad_l; + for (int w = 0; w < width_col; ++w) { + T* im_patch_data = im_data + (h_pad * width + w_pad) * depth; + for (int ih = h_pad; ih < h_pad + filter_h; ++ih) { + for (int iw = w_pad; iw < w_pad + filter_w; ++iw) { + if (ih >= 0 && ih < height && iw >= 0 && iw < width) { + // TODO(andydavis) Vectorize this loop (if compiler does not). + for (int i = 0; i < depth; ++i) { + im_patch_data[i] += col_data[i]; + } + } + im_patch_data += depth; + col_data += depth; + } + // Jump over remaining number of depth. + im_patch_data += depth * (width - filter_w); + } + w_pad += stride_w; + } + h_pad += stride_h; + } +} + +} // namespace tensorflow + +#endif // TENSORFLOW_KERNELS_OPS_UTIL_H_ |