aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/ops_util.h
blob: 283338f8df3aa4262f4aaa02386dacbfb3c8ad62 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
#ifndef TENSORFLOW_KERNELS_OPS_UTIL_H_
#define TENSORFLOW_KERNELS_OPS_UTIL_H_

// This file contains utilities for various operations.

#include "tensorflow/core/public/status.h"
#include "tensorflow/core/util/padding.h"
#include "tensorflow/core/public/tensor_shape.h"
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"

namespace tensorflow {

// Call this function from a test if op kernels are not being
// registered.  This can happen if the test is linked in a shared
// mode and has no direct references to any code from this directory.
void RequireDefaultOps();

// Get2dOutputSize(): Given an input tensor, kernel, stride and padding
// type, the function computes the output and padding dimensions.
//
// Convolution layers take in an input tensor of shape (D, C, R, B), and
// convolve it with a set of filters, which can also be presented as a
// tensor (D, K, K, M), where M is the number of filters, K is the filter size,
// and each 3-dimensional tensor of size (D, K, K) is a filter. For
// simplicity we assume that we always use square filters (which is usually the
// case in images). It also takes in a few additional parameters:
//
// Stride (S): the stride with which we apply the filters. This is the offset
// between locations where we apply the filters. A larger stride
// means that the output will be spatially smaller.
//
// Padding (P): the padding we apply to the input tensor along the R and C
// dimensions. This is usually used to make sure that the spatial dimension
// do not shrink when we progress with convolutions. Two types of padding are
// often used:
//   SAME: the pad value is computed so that the output will have size R/S
//         and C/S.
//   VALID: no padding is carried out.
// The padded area is zero-filled.
//
// The output dimensions for convolution and many other operations, when given
// all the parameters above, are as follows:
// - When Padding = SAME: the output size is (B, R', C', M), where
//     R' = ceil(float(R) / float(S))
//     C' = ceil(float(C) / float(S))
//   where ceil is the ceiling function. The number of padded rows and columns
//   are computed as:
//     Pr = ((R' - 1) * S + K - R) / 2
//     Pc = ((C' - 1) * S + K - C) / 2
//   When the stride is 1, we have the simplified case
//     R'=R, C'=C, Pr=Pc=(K-1)/2.
//   This is where SAME comes from - the output has the same size as the input
//   has.
//
// - When Padding = VALID: the output size is computed as
//     R' = ceil(float(R - K + 1) / float(S))
//     C' = ceil(float(C - K + 1) / float(S))
//   and the number of padded rows and columns are computed in the same way.
//   When the stride is 1, we have the simplified case
//     R'=R-K+1, C'=C-K+1, Pr=0, Pc=0.
//
// For convolution, mathematically, the output value at location (b, r', c', m)
// is the inner product of two vectors: the chunk of input at
//    (b, (r'*S-Pr) : (r'*S-Pr+K), (c'*S-Pc) : (c'*S-Pc+K), :),
// and the filter at (m, :, :, :).
//
Status Get2dOutputSize(const int in_height, const int in_width,
                       int filter_height, int filter_width, int row_stride,
                       int col_stride, Padding padding, int* new_height,
                       int* new_width, int* pad_rows, int* pad_cols);

// Returns the same output dimensions as in Get2dOutputSize, but returns verbose
// padding dimensions (top/bottom/left/right). Any excess padding (caused by
// an odd padding size value) is added to the 'pad_bottom' and 'pad_right'
// dimensions.
Status Get2dOutputSizeVerbose(const int in_height, const int in_width,
                              int filter_height, int filter_width,
                              int row_stride, int col_stride, Padding padding,
                              int* new_height, int* new_width, int* pad_top,
                              int* pad_bottom, int* pad_left, int* pad_right);

// Calculates broadcast starting index and size.  For SAME padding, addition
// padding could be applied to right, left, top and bottom.  Depending on the
// current index, input size, kernel size, stride, padding size, the starting
// index and size for broadcast for that dimension are different from the
// current index and kernel size.
// This is mainly used by gradient algorithms for pooling operations.
Status GetBroadcastSize(const int index, const int in_size,
                             const int ksize, const int stride,
                             const int pad_size, int* bindex, int* bsize);

// Converts Brain's Padding to Eigen's PaddingType.
Eigen::PaddingType BrainPadding2EigenPadding(Padding padding);

// Given a shape 's' of a tensor of type T. Returns true iff the
// number of bytes occupied by each dim 0 (i.e., &tensor(i + 1, ...) -
// &tensor(i, ...)) is multiple of EIGEN_ALIGN_BYTES.
template <typename T>
bool IsInnerDimsSizeAligned(const TensorShape& s) {
  if (s.dims() == 0) return false;
  const int64 dim0_size = s.dim_size(0);
  if (dim0_size == 0) return false;
  const int64 bytes_per_dim0 = (s.num_elements() / dim0_size) * sizeof(T);
  return bytes_per_dim0 % EIGEN_MAX_ALIGN_BYTES == 0;
}

// Returns in 'col_data', image patches in storage order (height, width, depth)
// extracted from image at 'input_data', which is requred to be in storage
// order (batch, height, width, depth).
// Implementation written by Yangqing Jia (jiayq).
template <typename T>
void Im2col(const T* input_data, const int depth, const int height,
            const int width, const int filter_h, const int filter_w,
            const int pad_t, const int pad_l, const int pad_b, const int pad_r,
            const int stride_h, const int stride_w, T* col_data) {
  int height_col = (height + pad_t + pad_b - filter_h) / stride_h + 1;
  int width_col = (width + pad_l + pad_r - filter_w) / stride_w + 1;

  int h_pad = -pad_t;
  for (int h = 0; h < height_col; ++h) {
    int w_pad = -pad_l;
    for (int w = 0; w < width_col; ++w) {
      for (int ih = h_pad; ih < h_pad + filter_h; ++ih) {
        for (int iw = w_pad; iw < w_pad + filter_w; ++iw) {
          if (ih >= 0 && ih < height && iw >= 0 && iw < width) {
            memcpy(col_data, input_data + (ih * width + iw) * depth,
                   sizeof(T) * depth);
          } else {
            // This should be simply padded with zero.
            memset(col_data, 0, sizeof(T) * depth);
          }
          col_data += depth;
        }
      }
      w_pad += stride_w;
    }
    h_pad += stride_h;
  }
}

// Returns in 'im_data' image patch in storage order (height, width, depth),
// constructed from patches in 'col_data', which is required to be in storage
// order (out_height * out_width, filter_height, filter_width, in_depth).
// Implementation by Yangqing Jia (jiayq).
template <typename T>
void Col2im(const T* col_data, const int depth, const int height,
            const int width, const int filter_h, const int filter_w,
            const int pad_t, const int pad_l, const int pad_b, const int pad_r,
            const int stride_h, const int stride_w, T* im_data) {
  memset(im_data, 0, sizeof(T) * height * width * depth);
  int height_col = (height + pad_t + pad_b - filter_h) / stride_h + 1;
  int width_col = (width + pad_l + pad_r - filter_w) / stride_w + 1;
  int h_pad = -pad_t;
  for (int h = 0; h < height_col; ++h) {
    int w_pad = -pad_l;
    for (int w = 0; w < width_col; ++w) {
      T* im_patch_data = im_data + (h_pad * width + w_pad) * depth;
      for (int ih = h_pad; ih < h_pad + filter_h; ++ih) {
        for (int iw = w_pad; iw < w_pad + filter_w; ++iw) {
          if (ih >= 0 && ih < height && iw >= 0 && iw < width) {
            // TODO(andydavis) Vectorize this loop (if compiler does not).
            for (int i = 0; i < depth; ++i) {
              im_patch_data[i] += col_data[i];
            }
          }
          im_patch_data += depth;
          col_data += depth;
        }
        // Jump over remaining number of depth.
        im_patch_data += depth * (width - filter_w);
      }
      w_pad += stride_w;
    }
    h_pad += stride_h;
  }
}

}  // namespace tensorflow

#endif  // TENSORFLOW_KERNELS_OPS_UTIL_H_