diff options
author | 2017-03-23 13:13:49 -0700 | |
---|---|---|
committer | 2017-03-23 13:13:49 -0700 | |
commit | fe97705b706c9dcd36586b6158e30758346c6afd (patch) | |
tree | 6dd25ad6e4f5c7288c02bfdad2aa8725a4762d64 /tensorflow/core/kernels/mkl_pooling_ops_common.cc | |
parent | 5b4a597b088344ff55c917a505eacefe605737aa (diff) |
MKL support for max/avg pooling and relu (#8296)
* Adding MKL support for Max/Avg Pooling and ReLU
* Missed the mkl layer registry files
* Fixed sanity check errors with buildifier
* Adding MKL support for Max/Avg Pooling and ReLU
* Missed the mkl layer registry files
* Fixed sanity check errors with buildifier
* Adding Intel Conv2D kernel implementation alongwith required Graph passes
This commit contains 4 main components:
1) Intel-optimized kernel implementation for Conv2D op
Implementation in kernels/mkl_conv_ops.*
2) Graph passes required to enable Conv2D optimized implementation
Implementation in graph/mkl_*. We also need a new op, MklToTf op.
Its implementation is in kernels/mkl_tfconv_op.cc.
3) Utility functions used in kernel implementation
Implementation is in common_runtime/mkl_layer_registry* and util/mkl_util.h
4) BUILD changes for Conv2D, graph passes and utility functions
* Refactor MKL convolution forward pass computation into smaller functions. Changed configure to point to newer MKLML library
* Moved Mkl helper datastructures and routines to private class members
* MKL op registration changed to use existing op registry (nhasabni)
* Fixed buildifier error
* Adding MKL support for Max/Avg Pooling and ReLU
* Missed the mkl layer registry files
* Fixed sanity check errors with buildifier
* Removed the mkl layer registry (should not have been added) and made fixes according to the code review comments
* Adding Intel Conv2D kernel implementation alongwith required Graph passes
This commit contains 4 main components:
1) Intel-optimized kernel implementation for Conv2D op
Implementation in kernels/mkl_conv_ops.*
2) Graph passes required to enable Conv2D optimized implementation
Implementation in graph/mkl_*. We also need a new op, MklToTf op.
Its implementation is in kernels/mkl_tfconv_op.cc.
3) Utility functions used in kernel implementation
Implementation is in common_runtime/mkl_layer_registry* and util/mkl_util.h
4) BUILD changes for Conv2D, graph passes and utility functions
* Refactor MKL convolution forward pass computation into smaller functions. Changed configure to point to newer MKLML library
* Moved Mkl helper datastructures and routines to private class members
* MKL op registration changed to use existing op registry (nhasabni)
* Fixed buildifier error
* Adding MKL support for Max/Avg Pooling and ReLU
* Missed the mkl layer registry files
* Fixed sanity check errors with buildifier
* Removed the mkl layer registry (should not have been added) and made fixes according to the code review comments
* Fixed rebase messups
* Added documentation for mkl pooling op parameters
* removed layer registry reference from mkl relu op
Diffstat (limited to 'tensorflow/core/kernels/mkl_pooling_ops_common.cc')
-rw-r--r-- | tensorflow/core/kernels/mkl_pooling_ops_common.cc | 166 |
1 files changed, 166 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.cc b/tensorflow/core/kernels/mkl_pooling_ops_common.cc new file mode 100644 index 0000000000..3eb472d7e3 --- /dev/null +++ b/tensorflow/core/kernels/mkl_pooling_ops_common.cc @@ -0,0 +1,166 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifdef INTEL_MKL +#include <vector> +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/kernels/mkl_pooling_ops_common.h" +#include "tensorflow/core/common_runtime/device.h" + +namespace tensorflow { + + // Initialization for TensorFlow format + void MklPoolParameters::Init(OpKernelContext* context, + const std::vector<int32>& ksize, + const std::vector<int32>& stride, + Padding padding, + TensorFormat data_format, + const TensorShape& tensor_in_shape) { + // For maxpooling, tensor_in should have 4 dimensions. + OP_REQUIRES(context, tensor_in_shape.dims() == 4, + errors::InvalidArgument("tensor_in must be 4-dimensional")); + + depth = GetTensorDim(tensor_in_shape, data_format, 'C'); + tensor_in_cols = GetTensorDim(tensor_in_shape, data_format, 'W'); + tensor_in_rows = GetTensorDim(tensor_in_shape, data_format, 'H'); + tensor_in_batch = GetTensorDim(tensor_in_shape, data_format, 'N'); + + Init(context, ksize, stride, padding, data_format); + } + + // Initialization for MKL format + void MklPoolParameters::Init(OpKernelContext* context, + const std::vector<int32>& ksize, + const std::vector<int32>& stride, + Padding padding, + TensorFormat data_format, + const MklShape* mklInputShape) { + // Get the input sizes + depth = mklInputShape->GetSizes()[2]; + tensor_in_cols = mklInputShape->GetSizes()[0]; + tensor_in_rows = mklInputShape->GetSizes()[1]; + tensor_in_batch = mklInputShape->GetSizes()[3]; + + Init(context, ksize, stride, padding, data_format); + } + + // Common Initialization for TensorFlow and MKL formats + void MklPoolParameters::Init(OpKernelContext* context, + const std::vector<int32>& ksize, + const std::vector<int32>& stride, + Padding padding, + TensorFormat data_format) { + // Get the data format + this->data_format = data_format; + + // Get the output sizes + window_rows = GetTensorDim(ksize, data_format, 'H'); + window_cols = GetTensorDim(ksize, data_format, 'W'); + depth_window = GetTensorDim(ksize, data_format, 'C'); + + // Get the strides + row_stride = GetTensorDim(stride, data_format, 'H'); + col_stride = GetTensorDim(stride, data_format, 'W'); + depth_stride = GetTensorDim(stride, data_format, 'C'); + + // We only support 2D pooling across width/height and depthwise + // pooling, not a combination. + OP_REQUIRES(context, + (depth_window == 1 || (window_rows == 1 && window_cols == 1)), + errors::Unimplemented( + "MaxPooling supports exactly one of pooling across depth " + "or pooling across width/height.")); + + if (depth_window == 1) { + OP_REQUIRES_OK(context, + GetWindowedOutputSizeVerbose(tensor_in_rows, + window_rows, + row_stride, + padding, + &out_height, + &pad_top, + &pad_bottom)); + + OP_REQUIRES_OK(context, + GetWindowedOutputSizeVerbose(tensor_in_cols, + window_cols, + col_stride, + padding, + &out_width, + &pad_left, + &pad_right)); + } else { + // Our current version of depthwise max pooling does not support + // any padding, and expects the depth_window to equal the depth + // stride (no overlapping). + OP_REQUIRES(context, depth % depth_window == 0, + errors::Unimplemented("Depthwise max pooling requires the" + " depth window to evenly divide the" + " input depth")); + OP_REQUIRES(context, depth_stride == depth_window, + errors::Unimplemented("Depthwise max pooling requires the" + " depth window to equal the depth" + " stride")); + + // The current version of depthwise max is only implemented on CPU. + OP_REQUIRES(context, + (DeviceType(static_cast<Device*>(context->device()) + ->attributes() + .device_type()) == DeviceType(DEVICE_CPU)), + errors::Unimplemented("Depthwise max pooling is currently " + "only implemented for CPU devices.")); + + pad_depth = 0; + out_depth = depth / depth_window; + } + } + + // Transfers the right parameters for pooling to the op parameters + // Updates context->status if there is an invalid input. + void ExtractMklOpParams(OpKernelContext* context, + TensorFormat data_format, + const MklPoolParameters ¶ms, + MklPoolingOpParams *mkl_params) { + mkl_params->in_sizes[0] = params.tensor_in_cols; + mkl_params->in_sizes[1] = params.tensor_in_rows; + mkl_params->in_sizes[2] = params.depth; + mkl_params->in_sizes[3] = params.tensor_in_batch; + + GetStridesFromSizes(data_format, + mkl_params->in_strides, + mkl_params->in_sizes); + + mkl_params->out_sizes[0] = params.out_width; + mkl_params->out_sizes[1] = params.out_height; + mkl_params->out_sizes[2] = params.depth; + mkl_params->out_sizes[3] = params.tensor_in_batch; + + GetStridesFromSizes(data_format, + mkl_params->out_strides, + mkl_params->out_sizes); + + mkl_params->in_offset[0] = -params.pad_left; + mkl_params->in_offset[1] = -params.pad_top; + mkl_params->in_offset[2] = -params.pad_right; + mkl_params->in_offset[3] = -params.pad_bottom; + + mkl_params->kernel_stride[0] = params.col_stride; + mkl_params->kernel_stride[1] = params.row_stride; + + mkl_params->kernel_size[0] = params.window_cols; + mkl_params->kernel_size[1] = params.window_rows; + } +} // namespace tensorflow +#endif // INTEL_MKL |