diff options
Diffstat (limited to 'tensorflow/core/kernels/mkl_maxpooling_op.cc')
-rw-r--r-- | tensorflow/core/kernels/mkl_maxpooling_op.cc | 506 |
1 files changed, 506 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/mkl_maxpooling_op.cc b/tensorflow/core/kernels/mkl_maxpooling_op.cc new file mode 100644 index 0000000000..9d6cfb0c97 --- /dev/null +++ b/tensorflow/core/kernels/mkl_maxpooling_op.cc @@ -0,0 +1,506 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// See docs in ../ops/nn_ops.cc. +#ifdef INTEL_MKL +#define EIGEN_USE_THREADS + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/kernels/mkl_pooling_ops_common.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/util/mkl_util.h" +#include "tensorflow/core/util/padding.h" + +namespace tensorflow { + +typedef Eigen::ThreadPoolDevice CPUDevice; + +// An implementation of MaxPooling (forward). +template <typename Device, typename T> +class MklMaxPoolingOp : public OpKernel { + public: + explicit MklMaxPoolingOp(OpKernelConstruction* context) : OpKernel(context) { + string data_format; + + OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); + OP_REQUIRES(context, FormatFromString(data_format, &data_format_), + errors::InvalidArgument("Invalid data format")); + OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_)); + OP_REQUIRES(context, ksize_.size() == 4, + errors::InvalidArgument("Sliding window ksize field must " + "specify 4 dimensions")); + OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_)); + OP_REQUIRES(context, stride_.size() == 4, + errors::InvalidArgument("Sliding window stride field must " + "specify 4 dimensions")); + OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); + OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1, + errors::Unimplemented("Pooling is not yet supported on the " + "batch dimension.")); + + workspace_enabled_ = false; + // We may not get this attribute for this node if it does not go through + // graph rewrite pass. So we do not check for error while retrieving this + // attribute value. + context->GetAttr("workspace_enabled", &workspace_enabled_); + } + + void Compute(OpKernelContext* context) override { + MklMaxPoolingOpContext mkl_context; + // Get the input tensor + const Tensor& tensor_in = MklGetInput(context, 0); + GetMklShape(context, 0, &mkl_context.input_shape); + bool input_in_mkl_format = mkl_context.input_shape.IsMklTensor(); + + mkl_context.params.in_dim = 4; + MklPoolParameters pool_params; + if (input_in_mkl_format == false) { + pool_params.Init(context, ksize_, stride_, padding_, data_format_, + tensor_in.shape()); + OP_REQUIRES( + context, (pool_params.depth_window == 1), + errors::Unimplemented("Depthwise max pooling not supported by MKL")); + + } else { + pool_params.Init(context, ksize_, stride_, padding_, data_format_, + &mkl_context.input_shape); + } + + // Extract the parameters for the op from the pooling specs + + ExtractMklOpParams(context, data_format_, pool_params, &mkl_context.params); + + mkl_context.MklCreateLayoutsAndPrimitives(context); + + // Declare output tensor + TensorShape tensor_out_shape; + MklShape mkl_out_shape; + mkl_out_shape.SetMklTensor(true); + mkl_out_shape.SetMklLayout(mkl_context.prim_pooling_fwd, dnnResourceDst); + mkl_out_shape.SetTfLayout(mkl_context.params.in_dim, + mkl_context.params.out_sizes, + mkl_context.params.out_strides); + mkl_out_shape.SetTfDimOrder(mkl_context.params.in_dim, data_format_); + + Tensor* output_tensor = nullptr; + tensor_out_shape.AddDim(dnnLayoutGetMemorySize_F32(static_cast<dnnLayout_t>( + mkl_out_shape.GetMklLayout())) / + sizeof(T)); + AllocateOutputSetMklshape(context, 0, &output_tensor, tensor_out_shape, + mkl_out_shape); + + if (!workspace_enabled_) { + mkl_out_shape.SetMklTensor(false); + } + + Tensor* workspace_tensor; + void* workspace_buf = nullptr; + if (workspace_enabled_) { + TensorShape workspace_shape; + workspace_shape.AddDim( + dnnLayoutGetMemorySize_F32( + static_cast<dnnLayout_t>(mkl_context.lt_workspace)) / + sizeof(T)); + AllocateOutputSetMklshape(context, 1, &workspace_tensor, workspace_shape, + mkl_out_shape); + mkl_context.pooling_res[dnnResourceWorkspace] = const_cast<void*>( + static_cast<const void*>(workspace_tensor->flat<T>().data())); + } else { + AllocTmpBuffer(context, workspace_tensor, mkl_context.lt_workspace, + &workspace_buf); + mkl_context.pooling_res[dnnResourceWorkspace] = workspace_buf; + } + + mkl_context.pooling_res[dnnResourceSrc] = + const_cast<void*>(static_cast<const void*>(tensor_in.flat<T>().data())); + mkl_context.pooling_res[dnnResourceDst] = const_cast<void*>( + static_cast<const void*>(output_tensor->flat<T>().data())); + + CHECK_EQ( + dnnExecute_F32(mkl_context.prim_pooling_fwd, mkl_context.pooling_res), + E_SUCCESS); + + mkl_context.MklCleanup(); + } + + private: + typedef struct { + MklPoolingOpParams params; + MklShape input_shape; + void* pooling_res[dnnResourceNumber]; + dnnPrimitive_t prim_pooling_fwd; + dnnLayout_t lt_user_input, lt_workspace; + + void MklCreateLayoutsAndPrimitives(OpKernelContext* context) { + bool input_in_mkl_format = input_shape.IsMklTensor(); + // Create or use existing DNN user layout + if (input_in_mkl_format == false) { + CHECK_EQ(dnnLayoutCreate_F32(<_user_input, params.in_dim, + params.in_sizes, params.in_strides), + E_SUCCESS); + } else { + lt_user_input = (dnnLayout_t)input_shape.GetCurLayout(); + } + + dnnAlgorithm_t algorithm = dnnAlgorithmPoolingMax; + dnnPrimitiveAttributes_t primAttr = nullptr; + + // Create DNN primitives + CHECK_EQ(dnnPoolingCreateForward_F32( + &prim_pooling_fwd, primAttr, algorithm, lt_user_input, + params.kernel_size, params.kernel_stride, params.in_offset, + dnnBorderZerosAsymm), + E_SUCCESS); + + // Creates layout for the workspace + CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(<_workspace, prim_pooling_fwd, + dnnResourceWorkspace), + E_SUCCESS); + } + + void MklCleanup() { + bool input_in_mkl_format = input_shape.IsMklTensor(); + CHECK_EQ(dnnDelete_F32(prim_pooling_fwd), E_SUCCESS); + if (!input_in_mkl_format) { + CHECK_EQ(dnnLayoutDelete_F32(lt_user_input), E_SUCCESS); + } + CHECK_EQ(dnnLayoutDelete_F32(lt_workspace), E_SUCCESS); + } + } MklMaxPoolingOpContext; + + std::vector<int32> ksize_; + std::vector<int32> stride_; + Padding padding_; + TensorFormat data_format_; + bool workspace_enabled_; +}; + +// The operation to compute MaxPool gradients. +// It takes three inputs: +// - The original input tensor +// - The original output tensor +// - Backprop tensor for output +// It produces one output: backprop tensor for input. +template <class Device, class T> +class MklMaxPoolingGradOp : public OpKernel { + public: + explicit MklMaxPoolingGradOp(OpKernelConstruction* context) + : OpKernel(context) { + string data_format; + + OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); + OP_REQUIRES(context, FormatFromString(data_format, &data_format_), + errors::InvalidArgument("Invalid data format")); + OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_)); + OP_REQUIRES(context, ksize_.size() == 4, + errors::InvalidArgument("Sliding window ksize field must " + "specify 4 dimensions")); + OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_)); + OP_REQUIRES(context, stride_.size() == 4, + errors::InvalidArgument("Sliding window strides field must " + "specify 4 dimensions")); + OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); + OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1, + errors::Unimplemented( + "Pooling is not yet supported on the batch dimension.")); + workspace_enabled_ = false; + // We may not get this attribute for this node if it does not go through + // graph rewrite pass. So we do not check for error while retrieving this + // attribute value. + context->GetAttr("workspace_enabled", &workspace_enabled_); + } + + void Compute(OpKernelContext* context) override { + MklMaxPoolingGradOpContext mkl_context; + // Input - The original input tensor + const Tensor& tensor_in = MklGetInput(context, 0); + + // Output - Backprop tensor for input. + Tensor* output_tensor = nullptr; + + GetMklShape(context, 0, &mkl_context.input_shape); + GetMklShape(context, 2, &mkl_context.output_backprop_shape); + bool input_in_mkl_format = mkl_context.input_shape.IsMklTensor(); + + if (input_in_mkl_format == false) + mkl_context.params.in_dim = tensor_in.dims(); + else + mkl_context.params.in_dim = mkl_context.input_shape.GetDimension(); + + MklPoolParameters pool_params; + if (input_in_mkl_format == false) { + pool_params.Init(context, ksize_, stride_, padding_, data_format_, + tensor_in.shape()); + OP_REQUIRES( + context, (pool_params.depth_window == 1), + errors::Unimplemented("Depthwise max pooling not supported by MKL")); + + } else { + pool_params.Init(context, ksize_, stride_, padding_, data_format_, + &mkl_context.input_shape); + } + + // Extract the parameters for the op from the pooling specs + ExtractMklOpParams(context, data_format_, pool_params, &mkl_context.params); + + mkl_context.MklCreateLayouts(context); + mkl_context.MklCreatePrimitives(context, workspace_enabled_); + mkl_context.MklPrepareInputs(context, workspace_enabled_); + + // Create shape for the input back prop output + TensorShape mkl_input_backprop; + MklShape mkl_output_shape; + mkl_output_shape.SetMklTensor(true); + mkl_output_shape.SetMklLayout(mkl_context.prim_pooling_bwd, + dnnResourceDiffSrc); + mkl_output_shape.SetTfLayout(mkl_context.params.in_dim, + mkl_context.params.in_sizes, + mkl_context.params.in_strides); + mkl_output_shape.SetTfDimOrder(mkl_context.params.in_dim, data_format_); + + mkl_input_backprop.AddDim( + dnnLayoutGetMemorySize_F32( + static_cast<dnnLayout_t>(mkl_output_shape.GetMklLayout())) / + sizeof(T)); + AllocateOutputSetMklshape(context, 0, &output_tensor, mkl_input_backprop, + mkl_output_shape); + mkl_context.pooling_res[dnnResourceDiffSrc] = const_cast<void*>( + static_cast<const void*>(output_tensor->flat<T>().data())); + + int64 output_size = output_tensor->NumElements(); + for (int64 i = 0; i < output_size; ++i) { + (static_cast<float*>(mkl_context.pooling_res[dnnResourceDiffSrc]))[i] = 0; + } + + CHECK_EQ( + dnnExecute_F32(mkl_context.prim_pooling_bwd, mkl_context.pooling_res), + E_SUCCESS); + + mkl_context.MklCleanup(workspace_enabled_); + } + + private: + typedef struct { + MklPoolingOpParams params; + MklShape input_shape, output_backprop_shape; + void* pooling_resfwd[dnnResourceNumber]; + void* pooling_res[dnnResourceNumber]; + dnnPrimitive_t prim_pooling_fwd, prim_pooling_bwd, convert_input, + convert_outbackprop; + dnnLayout_t lt_outbackprop_user, lt_outbackprop_prim, lt_input_user, + lt_input_prim; + void* input_buf; + void* outbackprop_buf; + + void MklCreateLayouts(OpKernelContext* context) { + bool input_in_mkl_format = input_shape.IsMklTensor(); + bool outbackprop_in_mkl_format = output_backprop_shape.IsMklTensor(); + // Create DNN user layout for input and outbackprop or get existing layout + if (input_in_mkl_format == false) { + CHECK_EQ(dnnLayoutCreate_F32(<_input_user, params.in_dim, + params.in_sizes, params.in_strides), + E_SUCCESS); + } else { + lt_input_user = (dnnLayout_t)input_shape.GetCurLayout(); + } + + // We dont care about the output layout for now as we can create it from + // primitives for the max pooling fwd prop + if (outbackprop_in_mkl_format == false) { + CHECK_EQ(dnnLayoutCreate_F32(<_outbackprop_user, params.in_dim, + params.out_sizes, params.out_strides), + E_SUCCESS); + } else { + lt_outbackprop_user = (dnnLayout_t)output_backprop_shape.GetCurLayout(); + } + } + + // Create DNN primitives + void MklCreatePrimitives(OpKernelContext* context, bool workspace_enabled) { + dnnAlgorithm_t algorithm = dnnAlgorithmPoolingMax; + dnnPrimitiveAttributes_t primAttr = nullptr; + + if (workspace_enabled == false) { + CHECK_EQ(dnnPoolingCreateForward_F32( + &prim_pooling_fwd, primAttr, algorithm, lt_input_user, + params.kernel_size, params.kernel_stride, params.in_offset, + dnnBorderZerosAsymm), + E_SUCCESS); + } + + CHECK_EQ(dnnPoolingCreateBackward_F32( + &prim_pooling_bwd, primAttr, algorithm, lt_input_user, + params.kernel_size, params.kernel_stride, params.in_offset, + dnnBorderZerosAsymm), + E_SUCCESS); + + // Creates conversions + CHECK_EQ(dnnLayoutCreateFromPrimitive_F32( + <_outbackprop_prim, prim_pooling_bwd, dnnResourceDiffDst), + E_SUCCESS); + + // Tensors needed to create temporary buffers + Tensor input_buf_tensor, outbackprop_buf_tensor; + + if (workspace_enabled == false) { + CHECK_EQ(dnnLayoutCreateFromPrimitive_F32( + <_input_prim, prim_pooling_fwd, dnnResourceSrc), + E_SUCCESS); + if (!dnnLayoutCompare_F32(lt_input_user, lt_input_prim)) { + CHECK_EQ(dnnConversionCreate_F32(&convert_input, lt_input_user, + lt_input_prim), + E_SUCCESS); + AllocTmpBuffer(context, &input_buf_tensor, lt_input_prim, &input_buf); + } + } + + if (!dnnLayoutCompare_F32(lt_outbackprop_user, lt_outbackprop_prim)) { + CHECK_EQ( + dnnConversionCreate_F32(&convert_outbackprop, lt_outbackprop_user, + lt_outbackprop_prim), + E_SUCCESS); + AllocTmpBuffer(context, &outbackprop_buf_tensor, lt_outbackprop_prim, + &outbackprop_buf); + } + } + + // Compare incoming tensor layouts with MKL preferred layouts and convert + // data to the preferred layout if necessary + void MklPrepareInputs(OpKernelContext* context, bool workspace_enabled) { + const Tensor& tensor_in = MklGetInput(context, 0); + const Tensor& out_backprop = MklGetInput(context, 2); + bool input_in_mkl_format = input_shape.IsMklTensor(); + bool outbackprop_in_mkl_format = output_backprop_shape.IsMklTensor(); + + void* tmp_output_buf; + Tensor tmp_output_buf_tensor; + + void* workspace_buf; + Tensor workspace_buf_tensor; + + if (workspace_enabled == false) { + if (convert_input != nullptr) { + if (input_in_mkl_format == false) { + CHECK_EQ(dnnConversionExecute_F32( + convert_input, + const_cast<void*>(static_cast<const void*>( + tensor_in.flat<T>().data())), + input_buf), + E_SUCCESS); + CHECK_EQ(dnnDelete_F32(convert_input), E_SUCCESS); + convert_input = nullptr; + } else { + input_shape.GetConvertedFlatData( + lt_input_prim, + const_cast<void*>( + static_cast<const void*>(tensor_in.flat<T>().data())), + input_buf); + } + pooling_resfwd[dnnResourceSrc] = input_buf; + } else { + pooling_resfwd[dnnResourceSrc] = const_cast<void*>( + static_cast<const void*>(tensor_in.flat<T>().data())); + } + + dnnLayout_t lt_workspace; + CHECK_EQ(dnnLayoutCreateFromPrimitive_F32( + <_workspace, prim_pooling_fwd, dnnResourceWorkspace), + E_SUCCESS); + AllocTmpBuffer(context, &workspace_buf_tensor, lt_workspace, + &workspace_buf); + pooling_resfwd[dnnResourceWorkspace] = workspace_buf; + + dnnLayoutDelete_F32(lt_workspace); + + // We create the layout for max pooling fwd prop tmp output here + AllocTmpBuffer(context, &tmp_output_buf_tensor, lt_outbackprop_prim, + &tmp_output_buf); + pooling_resfwd[dnnResourceDst] = tmp_output_buf; + + CHECK_EQ(dnnExecute_F32(prim_pooling_fwd, pooling_resfwd), E_SUCCESS); + pooling_res[dnnResourceWorkspace] = + pooling_resfwd[dnnResourceWorkspace]; + } else { + const Tensor& workspace = MklGetInput(context, 3); + pooling_res[dnnResourceWorkspace] = const_cast<void*>( + static_cast<const void*>(workspace.flat<T>().data())); + } + + // Out backprop conversions if needed + if (convert_outbackprop != nullptr) { + if (outbackprop_in_mkl_format == false) { + CHECK_EQ(dnnConversionExecute_F32( + convert_outbackprop, + const_cast<void*>(static_cast<const void*>( + out_backprop.flat<T>().data())), + outbackprop_buf), + E_SUCCESS); + CHECK_EQ(dnnDelete_F32(convert_outbackprop), E_SUCCESS); + } else { + output_backprop_shape.GetConvertedFlatData( + lt_outbackprop_prim, + const_cast<void*>( + static_cast<const void*>(out_backprop.flat<T>().data())), + outbackprop_buf); + } + pooling_res[dnnResourceDiffDst] = outbackprop_buf; + } else { + pooling_res[dnnResourceDiffDst] = const_cast<void*>( + static_cast<const void*>(out_backprop.flat<T>().data())); + } + } + + void MklCleanup(bool workspace_enabled) { + bool input_in_mkl_format = input_shape.IsMklTensor(); + bool outbackprop_in_mkl_format = output_backprop_shape.IsMklTensor(); + if (workspace_enabled == false) { + CHECK_EQ(dnnDelete_F32(prim_pooling_fwd), E_SUCCESS); + } + CHECK_EQ(dnnDelete_F32(prim_pooling_bwd), E_SUCCESS); + if (outbackprop_in_mkl_format == false) { + CHECK_EQ(dnnLayoutDelete_F32(lt_outbackprop_user), E_SUCCESS); + } + CHECK_EQ(dnnLayoutDelete_F32(lt_outbackprop_prim), E_SUCCESS); + if (input_in_mkl_format == false) { + CHECK_EQ(dnnLayoutDelete_F32(lt_input_user), E_SUCCESS); + } + if (workspace_enabled == false) { + CHECK_EQ(dnnLayoutDelete_F32(lt_input_prim), E_SUCCESS); + } + } + } MklMaxPoolingGradOpContext; + + std::vector<int32> ksize_; + std::vector<int32> stride_; + Padding padding_; + TensorFormat data_format_; + + bool workspace_enabled_; +}; + +REGISTER_KERNEL_BUILDER(Name("MklMaxPool") + .Device(DEVICE_CPU) + .TypeConstraint<float>("T") + .Label(mkl_layer_registry::kMklLayerLabel), + MklMaxPoolingOp<CPUDevice, float>); + +REGISTER_KERNEL_BUILDER(Name("MklMaxPoolGrad") + .Device(DEVICE_CPU) + .TypeConstraint<float>("T") + .Label(mkl_layer_registry::kMklLayerLabel), + MklMaxPoolingGradOp<CPUDevice, float>); + +} // namespace tensorflow +#endif // INTEL_MKL |