diff options
Diffstat (limited to 'tensorflow/core/kernels/pooling_ops_common.h')
-rw-r--r-- | tensorflow/core/kernels/pooling_ops_common.h | 264 |
1 files changed, 264 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/pooling_ops_common.h b/tensorflow/core/kernels/pooling_ops_common.h new file mode 100644 index 0000000000..5bf44b6e40 --- /dev/null +++ b/tensorflow/core/kernels/pooling_ops_common.h @@ -0,0 +1,264 @@ +#ifndef TENSORFLOW_KERNELS_POOLING_OPS_COMMON_H_ +#define TENSORFLOW_KERNELS_POOLING_OPS_COMMON_H_ + +#include <vector> + +#include "tensorflow/core/framework/numeric_op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/kernels/avgpooling_op.h" +#include "tensorflow/core/kernels/maxpooling_op.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/util/padding.h" +#include "tensorflow/core/public/tensor_shape.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/NeuralNetworks" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace tensorflow { + +typedef Eigen::GpuDevice GPUDevice; + +// A helper class to manage sizes and shapes for pooling operations. +struct PoolParameters { + // Updates context->status if there is an invalid input. + PoolParameters(OpKernelContext* context, const std::vector<int32>& ksize, + const std::vector<int32>& stride, Padding padding, + const TensorShape& tensor_in_shape); + + // Returns the shape of the output for "forward" pooling operations. + TensorShape forward_output_shape(); + + int depth; + + int tensor_in_cols; + int tensor_in_rows; + int tensor_in_batch; + + int window_rows; + int window_cols; + int depth_window; + + int row_stride; + int col_stride; + int depth_stride; + + int out_height; + int out_width; + int out_depth; + + int pad_rows; + int pad_cols; + int pad_depth; +}; + +// An implementation of MaxPooling (forward). +template <typename Device, typename T> +class MaxPoolingOp : public UnaryOp<T> { + public: + explicit MaxPoolingOp(OpKernelConstruction* context) : UnaryOp<T>(context) { + OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_)); + OP_REQUIRES(context, ksize_.size() == 4, + errors::InvalidArgument("Sliding window ksize field must " + "specify 4 dimensions")); + OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_)); + OP_REQUIRES(context, stride_.size() == 4, + errors::InvalidArgument("Sliding window stride field must " + "specify 4 dimensions")); + OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); + OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1, + errors::Unimplemented( + "Pooling is not yet supported on the batch dimension.")); + } + + void Compute(OpKernelContext* context) override { + const Tensor& tensor_in = context->input(0); + PoolParameters params{context, ksize_, stride_, padding_, + tensor_in.shape()}; + if (!context->status().ok()) { + return; + } + + Tensor* output = nullptr; + OP_REQUIRES_OK(context, context->allocate_output( + 0, params.forward_output_shape(), &output)); + + if (params.depth_window > 1) { + DepthwiseMaxPool(context, output, tensor_in, params); + } else { + SpatialMaxPool(context, output, tensor_in, params, padding_); + } + } + + private: + // Single-threaded implementation of DepthwiseMaxPool which + // does not handle all of the same options as SpatialMaxPool + // (strict assumptions on no padding, stride). + // + // TODO(vrv): implement a more general depthwise-max pool that works + // on GPU as well. + void DepthwiseMaxPool(OpKernelContext* context, Tensor* output, + const Tensor& tensor_in, const PoolParameters& params) { + Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> + in_by_pool(tensor_in.flat<T>().data(), params.depth_window, + tensor_in.NumElements() / params.depth_window); + Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> out_by_pool( + output->flat<T>().data(), 1, output->NumElements()); + out_by_pool = in_by_pool.colwise().maxCoeff(); + } + + void SpatialMaxPool(OpKernelContext* context, Tensor* output, + const Tensor& tensor_in, const PoolParameters& params, + const Padding& padding) { + // On GPU, use Eigen's Spatial Max Pooling. On CPU, use an + // EigenMatrix version that is currently faster than Eigen's + // Spatial MaxPooling implementation. + // + // TODO(vrv): Remove this once we no longer need it. + if (std::is_same<Device, GPUDevice>::value) { + Eigen::PaddingType pt = BrainPadding2EigenPadding(padding); + functor::SpatialMaxPooling<Device, T>()( + context->eigen_device<Device>(), output->tensor<T, 4>(), + tensor_in.tensor<T, 4>(), params.window_rows, params.window_cols, + params.row_stride, params.col_stride, pt); + } else { + typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> + ConstEigenMatrixMap; + typedef Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> + EigenMatrixMap; + + ConstEigenMatrixMap in_mat(tensor_in.flat<T>().data(), params.depth, + params.tensor_in_cols * params.tensor_in_rows * + params.tensor_in_batch); + EigenMatrixMap out_mat( + output->flat<T>().data(), params.depth, + params.out_width * params.out_height * params.tensor_in_batch); + + // Initializes the output tensor with MIN<T>. + output->flat<T>().setConstant(Eigen::NumTraits<T>::lowest()); + + // The following code basically does the following: + // 1. Flattens the input and output tensors into two dimensional arrays. + // tensor_in_as_matrix: + // depth by (tensor_in_cols * tensor_in_rows * tensor_in_batch) + // output_as_matrix: + // depth by (out_width * out_height * tensor_in_batch) + // + // 2. Walks through the set of columns in the flattened + // tensor_in_as_matrix, + // and updates the corresponding column(s) in output_as_matrix with the + // max value. + for (int b = 0; b < params.tensor_in_batch; ++b) { + for (int h = 0; h < params.tensor_in_rows; ++h) { + for (int w = 0; w < params.tensor_in_cols; ++w) { + // (h_start, h_end) * (w_start, w_end) is the range that the input + // vector projects to. + const int hpad = h + params.pad_rows; + const int wpad = w + params.pad_cols; + const int h_start = + (hpad < params.window_rows) + ? 0 + : (hpad - params.window_rows) / params.row_stride + 1; + const int h_end = + std::min(hpad / params.row_stride + 1, params.out_height); + const int w_start = + (wpad < params.window_cols) + ? 0 + : (wpad - params.window_cols) / params.col_stride + 1; + const int w_end = + std::min(wpad / params.col_stride + 1, params.out_width); + // compute elementwise max + const int in_offset = + (b * params.tensor_in_rows + h) * params.tensor_in_cols + w; + for (int ph = h_start; ph < h_end; ++ph) { + for (int pw = w_start; pw < w_end; ++pw) { + const int out_offset = + (b * params.out_height + ph) * params.out_width + pw; + out_mat.col(out_offset) = + out_mat.col(out_offset).cwiseMax(in_mat.col(in_offset)); + } + } + } + } + } + } + } + + std::vector<int32> ksize_; + std::vector<int32> stride_; + Padding padding_; +}; + +template <typename Device, typename T> +void SpatialAvgPool(OpKernelContext* context, Tensor* output, + const Tensor& input, const PoolParameters& params, + const Padding& padding) { + typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> + ConstEigenMatrixMap; + typedef Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> + EigenMatrixMap; + + auto in_flat = input.flat<T>(); + auto out_flat = output->flat<T>(); + + ConstEigenMatrixMap in_mat( + in_flat.data(), params.depth, + params.tensor_in_cols * params.tensor_in_rows * params.tensor_in_batch); + EigenMatrixMap out_mat( + out_flat.data(), params.depth, + params.out_width * params.out_height * params.tensor_in_batch); + Eigen::Matrix<T, Eigen::Dynamic, 1> out_count(out_mat.cols()); + out_count.setZero(); + + // Initializes output to zero. + out_flat.setZero(); + + // The following code basically does the following: + // 1. Flattens the input and output tensors into two dimensional arrays. + // tensor_in_as_matrix: + // depth by (tensor_in_cols * tensor_in_rows * tensor_in_batch) + // output_as_matrix: + // depth by (out_width * out_height * tensor_in_batch) + // + // 2. Walks through the set of columns in the flattened + // tensor_in_as_matrix, + // and updates the corresponding column(s) in output_as_matrix with the + // average value. + for (int b = 0; b < params.tensor_in_batch; ++b) { + for (int h = 0; h < params.tensor_in_rows; ++h) { + for (int w = 0; w < params.tensor_in_cols; ++w) { + // (h_start, h_end) * (w_start, w_end) is the range that the input + // vector projects to. + const int hpad = h + params.pad_rows; + const int wpad = w + params.pad_cols; + const int h_start = + (hpad < params.window_rows) + ? 0 + : (hpad - params.window_rows) / params.row_stride + 1; + const int h_end = + std::min(hpad / params.row_stride + 1, params.out_height); + const int w_start = + (wpad < params.window_cols) + ? 0 + : (wpad - params.window_cols) / params.col_stride + 1; + const int w_end = + std::min(wpad / params.col_stride + 1, params.out_width); + const int in_offset = + (b * params.tensor_in_rows + h) * params.tensor_in_cols + w; + Eigen::DSizes<ptrdiff_t, 2> in_indices(0, in_offset); + for (int ph = h_start; ph < h_end; ++ph) { + for (int pw = w_start; pw < w_end; ++pw) { + const int out_offset = + (b * params.out_height + ph) * params.out_width + pw; + out_mat.col(out_offset) += in_mat.col(in_offset); + out_count(out_offset)++; + } + } + } + } + } + DCHECK_GT(out_count.minCoeff(), 0); + out_mat.array().rowwise() /= out_count.transpose().array(); +} + +} // namespace tensorflow + +#endif // TENSORFLOW_KERNELS_POOLING_OPS_COMMON_H_ |