From fe97705b706c9dcd36586b6158e30758346c6afd Mon Sep 17 00:00:00 2001 From: Vivek Rane Date: Thu, 23 Mar 2017 13:13:49 -0700 Subject: MKL support for max/avg pooling and relu (#8296) * Adding MKL support for Max/Avg Pooling and ReLU * Missed the mkl layer registry files * Fixed sanity check errors with buildifier * Adding MKL support for Max/Avg Pooling and ReLU * Missed the mkl layer registry files * Fixed sanity check errors with buildifier * Adding Intel Conv2D kernel implementation alongwith required Graph passes This commit contains 4 main components: 1) Intel-optimized kernel implementation for Conv2D op Implementation in kernels/mkl_conv_ops.* 2) Graph passes required to enable Conv2D optimized implementation Implementation in graph/mkl_*. We also need a new op, MklToTf op. Its implementation is in kernels/mkl_tfconv_op.cc. 3) Utility functions used in kernel implementation Implementation is in common_runtime/mkl_layer_registry* and util/mkl_util.h 4) BUILD changes for Conv2D, graph passes and utility functions * Refactor MKL convolution forward pass computation into smaller functions. Changed configure to point to newer MKLML library * Moved Mkl helper datastructures and routines to private class members * MKL op registration changed to use existing op registry (nhasabni) * Fixed buildifier error * Adding MKL support for Max/Avg Pooling and ReLU * Missed the mkl layer registry files * Fixed sanity check errors with buildifier * Removed the mkl layer registry (should not have been added) and made fixes according to the code review comments * Adding Intel Conv2D kernel implementation alongwith required Graph passes This commit contains 4 main components: 1) Intel-optimized kernel implementation for Conv2D op Implementation in kernels/mkl_conv_ops.* 2) Graph passes required to enable Conv2D optimized implementation Implementation in graph/mkl_*. We also need a new op, MklToTf op. Its implementation is in kernels/mkl_tfconv_op.cc. 3) Utility functions used in kernel implementation Implementation is in common_runtime/mkl_layer_registry* and util/mkl_util.h 4) BUILD changes for Conv2D, graph passes and utility functions * Refactor MKL convolution forward pass computation into smaller functions. Changed configure to point to newer MKLML library * Moved Mkl helper datastructures and routines to private class members * MKL op registration changed to use existing op registry (nhasabni) * Fixed buildifier error * Adding MKL support for Max/Avg Pooling and ReLU * Missed the mkl layer registry files * Fixed sanity check errors with buildifier * Removed the mkl layer registry (should not have been added) and made fixes according to the code review comments * Fixed rebase messups * Added documentation for mkl pooling op parameters * removed layer registry reference from mkl relu op --- tensorflow/core/kernels/mkl_pooling_ops_common.cc | 166 ++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 tensorflow/core/kernels/mkl_pooling_ops_common.cc (limited to 'tensorflow/core/kernels/mkl_pooling_ops_common.cc') diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.cc b/tensorflow/core/kernels/mkl_pooling_ops_common.cc new file mode 100644 index 0000000000..3eb472d7e3 --- /dev/null +++ b/tensorflow/core/kernels/mkl_pooling_ops_common.cc @@ -0,0 +1,166 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifdef INTEL_MKL +#include +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/kernels/mkl_pooling_ops_common.h" +#include "tensorflow/core/common_runtime/device.h" + +namespace tensorflow { + + // Initialization for TensorFlow format + void MklPoolParameters::Init(OpKernelContext* context, + const std::vector& ksize, + const std::vector& stride, + Padding padding, + TensorFormat data_format, + const TensorShape& tensor_in_shape) { + // For maxpooling, tensor_in should have 4 dimensions. + OP_REQUIRES(context, tensor_in_shape.dims() == 4, + errors::InvalidArgument("tensor_in must be 4-dimensional")); + + depth = GetTensorDim(tensor_in_shape, data_format, 'C'); + tensor_in_cols = GetTensorDim(tensor_in_shape, data_format, 'W'); + tensor_in_rows = GetTensorDim(tensor_in_shape, data_format, 'H'); + tensor_in_batch = GetTensorDim(tensor_in_shape, data_format, 'N'); + + Init(context, ksize, stride, padding, data_format); + } + + // Initialization for MKL format + void MklPoolParameters::Init(OpKernelContext* context, + const std::vector& ksize, + const std::vector& stride, + Padding padding, + TensorFormat data_format, + const MklShape* mklInputShape) { + // Get the input sizes + depth = mklInputShape->GetSizes()[2]; + tensor_in_cols = mklInputShape->GetSizes()[0]; + tensor_in_rows = mklInputShape->GetSizes()[1]; + tensor_in_batch = mklInputShape->GetSizes()[3]; + + Init(context, ksize, stride, padding, data_format); + } + + // Common Initialization for TensorFlow and MKL formats + void MklPoolParameters::Init(OpKernelContext* context, + const std::vector& ksize, + const std::vector& stride, + Padding padding, + TensorFormat data_format) { + // Get the data format + this->data_format = data_format; + + // Get the output sizes + window_rows = GetTensorDim(ksize, data_format, 'H'); + window_cols = GetTensorDim(ksize, data_format, 'W'); + depth_window = GetTensorDim(ksize, data_format, 'C'); + + // Get the strides + row_stride = GetTensorDim(stride, data_format, 'H'); + col_stride = GetTensorDim(stride, data_format, 'W'); + depth_stride = GetTensorDim(stride, data_format, 'C'); + + // We only support 2D pooling across width/height and depthwise + // pooling, not a combination. + OP_REQUIRES(context, + (depth_window == 1 || (window_rows == 1 && window_cols == 1)), + errors::Unimplemented( + "MaxPooling supports exactly one of pooling across depth " + "or pooling across width/height.")); + + if (depth_window == 1) { + OP_REQUIRES_OK(context, + GetWindowedOutputSizeVerbose(tensor_in_rows, + window_rows, + row_stride, + padding, + &out_height, + &pad_top, + &pad_bottom)); + + OP_REQUIRES_OK(context, + GetWindowedOutputSizeVerbose(tensor_in_cols, + window_cols, + col_stride, + padding, + &out_width, + &pad_left, + &pad_right)); + } else { + // Our current version of depthwise max pooling does not support + // any padding, and expects the depth_window to equal the depth + // stride (no overlapping). + OP_REQUIRES(context, depth % depth_window == 0, + errors::Unimplemented("Depthwise max pooling requires the" + " depth window to evenly divide the" + " input depth")); + OP_REQUIRES(context, depth_stride == depth_window, + errors::Unimplemented("Depthwise max pooling requires the" + " depth window to equal the depth" + " stride")); + + // The current version of depthwise max is only implemented on CPU. + OP_REQUIRES(context, + (DeviceType(static_cast(context->device()) + ->attributes() + .device_type()) == DeviceType(DEVICE_CPU)), + errors::Unimplemented("Depthwise max pooling is currently " + "only implemented for CPU devices.")); + + pad_depth = 0; + out_depth = depth / depth_window; + } + } + + // Transfers the right parameters for pooling to the op parameters + // Updates context->status if there is an invalid input. + void ExtractMklOpParams(OpKernelContext* context, + TensorFormat data_format, + const MklPoolParameters ¶ms, + MklPoolingOpParams *mkl_params) { + mkl_params->in_sizes[0] = params.tensor_in_cols; + mkl_params->in_sizes[1] = params.tensor_in_rows; + mkl_params->in_sizes[2] = params.depth; + mkl_params->in_sizes[3] = params.tensor_in_batch; + + GetStridesFromSizes(data_format, + mkl_params->in_strides, + mkl_params->in_sizes); + + mkl_params->out_sizes[0] = params.out_width; + mkl_params->out_sizes[1] = params.out_height; + mkl_params->out_sizes[2] = params.depth; + mkl_params->out_sizes[3] = params.tensor_in_batch; + + GetStridesFromSizes(data_format, + mkl_params->out_strides, + mkl_params->out_sizes); + + mkl_params->in_offset[0] = -params.pad_left; + mkl_params->in_offset[1] = -params.pad_top; + mkl_params->in_offset[2] = -params.pad_right; + mkl_params->in_offset[3] = -params.pad_bottom; + + mkl_params->kernel_stride[0] = params.col_stride; + mkl_params->kernel_stride[1] = params.row_stride; + + mkl_params->kernel_size[0] = params.window_cols; + mkl_params->kernel_size[1] = params.window_rows; + } +} // namespace tensorflow +#endif // INTEL_MKL -- cgit v1.2.3