diff options
Diffstat (limited to 'tensorflow/core/kernels/mkl_avgpooling_op.cc')
-rw-r--r-- | tensorflow/core/kernels/mkl_avgpooling_op.cc | 428 |
1 files changed, 428 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/mkl_avgpooling_op.cc b/tensorflow/core/kernels/mkl_avgpooling_op.cc new file mode 100644 index 0000000000..71918fe269 --- /dev/null +++ b/tensorflow/core/kernels/mkl_avgpooling_op.cc @@ -0,0 +1,428 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + ==============================================================================*/ + +#ifdef INTEL_MKL +#define EIGEN_USE_THREADS + +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/numeric_op.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/util/mkl_util.h" + +#include "tensorflow/core/kernels/mkl_pooling_ops_common.h" + +namespace tensorflow { + +typedef Eigen::ThreadPoolDevice CPUDevice; + +template <typename Device, typename T> +class MklAvgPoolingOp : public UnaryOp<T> { + public: + explicit MklAvgPoolingOp(OpKernelConstruction* context) + : UnaryOp<T>(context) { + string data_format; + OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); + OP_REQUIRES(context, FormatFromString(data_format, &data_format_), + errors::InvalidArgument("Invalid data format")); + + OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_)); + OP_REQUIRES(context, ksize_.size() == 4, + errors::InvalidArgument("Sliding window ksize field must " + "specify 4 dimensions")); + OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_)); + OP_REQUIRES(context, stride_.size() == 4, + errors::InvalidArgument("Sliding window stride field must " + "specify 4 dimensions")); + OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); + OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1, + errors::Unimplemented("Pooling is not yet supported on the " + "batch dimension.")); + } + + void Compute(OpKernelContext* context) override { + MklAvgPoolingOpContext mkl_context; + const Tensor& tensor_in = MklGetInput(context, 0); + GetMklShape(context, 0, &mkl_context.input_shape); + bool input_in_mkl_format = mkl_context.input_shape.IsMklTensor(); + + if (!input_in_mkl_format) + mkl_context.params.in_dim = tensor_in.dims(); + else + mkl_context.params.in_dim = mkl_context.input_shape.GetDimension(); + + MklPoolParameters pool_params; + if (!input_in_mkl_format) { + pool_params.Init(context, ksize_, stride_, padding_, data_format_, + tensor_in.shape()); + } else { + pool_params.Init(context, ksize_, stride_, padding_, data_format_, + &mkl_context.input_shape); + } + + // Extract the parameters for the op from the pooling specs + ExtractMklOpParams(context, data_format_, pool_params, &mkl_context.params); + + Tensor mkl_tmp_input_buf_tensor_; + mkl_context.MklCreateLayoutsAndPrimitives(context, + &mkl_tmp_input_buf_tensor_); + + Tensor workspace_tensor; + void* workspace_buf; + AllocTmpBuffer(context, &workspace_tensor, mkl_context.lt_workspace, + &workspace_buf); + + if (mkl_context.convert_input != nullptr) { + if (input_in_mkl_format == false) { + CHECK_EQ( + dnnConversionExecute_F32( + mkl_context.convert_input, + static_cast<void*>(const_cast<T*>(tensor_in.flat<T>().data())), + mkl_context.input_buf), + E_SUCCESS); + CHECK_EQ(dnnDelete_F32(mkl_context.convert_input), E_SUCCESS); + } else { + mkl_context.input_shape.GetConvertedFlatData( + mkl_context.lt_prim_input, + static_cast<void*>(const_cast<T*>(tensor_in.flat<T>().data())), + mkl_context.input_buf); + } + mkl_context.pooling_res[dnnResourceSrc] = mkl_context.input_buf; + } else { + mkl_context.pooling_res[dnnResourceSrc] = + static_cast<void*>(const_cast<T*>(tensor_in.flat<T>().data())); + } + + // Declare output tensor and allocate memory + Tensor* output = nullptr; + TensorShape tensor_out_shape; + MklShape mkl_out_shape; + mkl_out_shape.SetMklTensor(true); + mkl_out_shape.SetMklLayout(mkl_context.prim_pooling_fwd, dnnResourceDst); + mkl_out_shape.SetTfLayout(mkl_context.params.in_dim, + mkl_context.params.out_sizes, + mkl_context.params.out_strides); + mkl_out_shape.SetTfDimOrder(mkl_context.params.in_dim, data_format_); + + tensor_out_shape.AddDim(dnnLayoutGetMemorySize_F32(static_cast<dnnLayout_t>( + mkl_out_shape.GetMklLayout())) / + sizeof(T)); + + AllocateOutputSetMklshape(context, 0, &output, tensor_out_shape, + mkl_out_shape); + mkl_context.pooling_res[dnnResourceDst] = + static_cast<void*>(output->flat<T>().data()); + + mkl_context.pooling_res[dnnResourceWorkspace] = workspace_buf; + + CHECK_EQ( + dnnExecute_F32(mkl_context.prim_pooling_fwd, mkl_context.pooling_res), + E_SUCCESS); + + mkl_context.MklCleanup(); + } + + private: + typedef struct { + MklPoolingOpParams params; + MklShape input_shape; + dnnPrimitive_t prim_pooling_fwd, convert_input; + dnnLayout_t lt_user_input, lt_prim_input, lt_workspace; + void* input_buf; + void* pooling_res[dnnResourceNumber]; + + void MklCreateLayoutsAndPrimitives(OpKernelContext* context, + Tensor* mkl_tmp_input_buf_tensor) { + bool input_in_mkl_format = input_shape.IsMklTensor(); + + if (!input_in_mkl_format) { + CHECK_EQ(dnnLayoutCreate_F32(<_user_input, params.in_dim, + params.in_sizes, params.in_strides), + E_SUCCESS); + } else { + lt_user_input = (dnnLayout_t)input_shape.GetCurLayout(); + } + + dnnAlgorithm_t algorithm = dnnAlgorithmPoolingAvg; + dnnPrimitiveAttributes_t primAttr = nullptr; + + // Create DNN primitives + CHECK_EQ(dnnPoolingCreateForward_F32( + &prim_pooling_fwd, primAttr, algorithm, lt_user_input, + params.kernel_size, params.kernel_stride, params.in_offset, + dnnBorderZerosAsymm), + E_SUCCESS); + + CHECK_EQ(dnnLayoutCreateFromPrimitive_F32( + <_prim_input, prim_pooling_fwd, dnnResourceSrc), + E_SUCCESS); + if (!dnnLayoutCompare_F32(lt_user_input, lt_prim_input)) { + CHECK_EQ(dnnConversionCreate_F32(&convert_input, lt_user_input, + lt_prim_input), + E_SUCCESS); + + AllocTmpBuffer(context, mkl_tmp_input_buf_tensor, lt_prim_input, + &input_buf); + } + + CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(<_workspace, prim_pooling_fwd, + dnnResourceWorkspace), + E_SUCCESS); + } + + void MklCleanup() { + bool input_in_mkl_format = input_shape.IsMklTensor(); + if (!input_in_mkl_format) { + CHECK_EQ(dnnLayoutDelete_F32(lt_user_input), E_SUCCESS); + } + + CHECK_EQ(dnnDelete_F32(prim_pooling_fwd), E_SUCCESS); + CHECK_EQ(dnnLayoutDelete_F32(lt_prim_input), E_SUCCESS); + } + } MklAvgPoolingOpContext; + + std::vector<int32> ksize_; + std::vector<int32> stride_; + Padding padding_; + TensorFormat data_format_; +}; + +//----------------------------------------------------------------------------- + +template <class Device, class T> +class MklAvgPoolingGradOp : public OpKernel { + public: + explicit MklAvgPoolingGradOp(OpKernelConstruction* context) + : OpKernel(context) { + string data_format; + + OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); + OP_REQUIRES(context, FormatFromString(data_format, &data_format_), + errors::InvalidArgument("Invalid data format")); + OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_)); + OP_REQUIRES(context, ksize_.size() == 4, + errors::InvalidArgument("Sliding window ksize field must " + "specify 4 dimensions")); + OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_)); + OP_REQUIRES(context, stride_.size() == 4, + errors::InvalidArgument("Sliding window strides field must " + "specify 4 dimensions")); + OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); + OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1, + errors::Unimplemented("Pooling is not yet supported on the " + "batch dimension.")); + } + + void Compute(OpKernelContext* context) override { + MklAvgPoolingGradOpContext mkl_context; + const Tensor& tensor_in_shape = MklGetInput(context, 0); + const Tensor& out_backprop = MklGetInput(context, 1); + GetMklShape(context, 1, &mkl_context.out_backprop_shape); + bool outbackprop_in_mkl_format = + mkl_context.out_backprop_shape.IsMklTensor(); + + TensorShape output_shape; + auto shape_vec = tensor_in_shape.vec<int32>(); + for (int64 i = 0; i < tensor_in_shape.NumElements(); ++i) { + output_shape.AddDim(shape_vec(i)); + } + + MklPoolParameters pool_params; + pool_params.Init(context, ksize_, stride_, padding_, data_format_, + output_shape); + + // Extract the parameters for the op from the pooling specs + ExtractMklOpParams(context, data_format_, pool_params, &mkl_context.params); + + // Tensors needed to create temporary buffers + Tensor outbackprop_buf_tensor; + void* outbackprop_buf; + mkl_context.MklCreateLayoutsAndPrimitives(context); + + // Check if outbackprop layout requires conversion. + if (!dnnLayoutCompare_F32(mkl_context.lt_user_outbackprop, + mkl_context.lt_prim_outbackprop)) { + CHECK_EQ(dnnConversionCreate_F32(&mkl_context.convert_outbackprop, + mkl_context.lt_user_outbackprop, + mkl_context.lt_prim_outbackprop), + E_SUCCESS); + + AllocTmpBuffer(context, &outbackprop_buf_tensor, + mkl_context.lt_prim_outbackprop, &outbackprop_buf); + + if (!outbackprop_in_mkl_format) { + CHECK_EQ(dnnConversionExecute_F32(mkl_context.convert_outbackprop, + static_cast<void*>(const_cast<T*>( + out_backprop.flat<T>().data())), + outbackprop_buf), + E_SUCCESS); + CHECK_EQ(dnnDelete_F32(mkl_context.convert_outbackprop), E_SUCCESS); + } else { + mkl_context.out_backprop_shape.GetConvertedFlatData( + mkl_context.lt_prim_outbackprop, + static_cast<void*>(const_cast<T*>(out_backprop.flat<T>().data())), + outbackprop_buf); + } + mkl_context.pooling_res[dnnResourceDiffDst] = outbackprop_buf; + } else { + mkl_context.pooling_res[dnnResourceDiffDst] = + static_cast<void*>(const_cast<T*>(out_backprop.flat<T>().data())); + } + + // Handle workspace requirements. + Tensor workspace_buf_tensor; + void* workspace_buf; + AllocTmpBuffer(context, &workspace_buf_tensor, mkl_context.lt_workspace, + &workspace_buf); + mkl_context.pooling_res[dnnResourceWorkspace] = workspace_buf; + + // Handle MKL output tensor setup. + Tensor* output = nullptr; + TensorShape tensor_out_shape; + MklShape mkl_out_shape; + mkl_out_shape.SetMklTensor(true); + mkl_out_shape.SetMklLayout(mkl_context.prim_pooling_bwd, + dnnResourceDiffSrc); + mkl_out_shape.SetTfLayout(mkl_context.params.in_dim, + mkl_context.params.in_sizes, + mkl_context.params.in_strides); + mkl_out_shape.SetTfDimOrder(mkl_context.params.in_dim, data_format_); + + tensor_out_shape.AddDim(dnnLayoutGetMemorySize_F32(static_cast<dnnLayout_t>( + mkl_out_shape.GetMklLayout())) / + sizeof(T)); + + AllocateOutputSetMklshape(context, 0, &output, tensor_out_shape, + mkl_out_shape); + + // Set output tensor. + mkl_context.pooling_res[dnnResourceDiffSrc] = + static_cast<void*>(output->flat<T>().data()); + + // Execute primitive. + CHECK_EQ( + dnnExecute_F32(mkl_context.prim_pooling_bwd, mkl_context.pooling_res), + E_SUCCESS); + + mkl_context.MklCleanup(); + } + + private: + typedef struct { + MklPoolingOpParams params; + MklShape out_backprop_shape; + dnnPrimitive_t prim_pooling_bwd, convert_outbackprop; + void* pooling_res[dnnResourceNumber]; + dnnLayout_t lt_user_input, lt_user_outbackprop, lt_prim_outbackprop, + lt_workspace; + + void MklCreateLayoutsAndPrimitives(OpKernelContext* context) { + const Tensor& tensor_in_shape = MklGetInput(context, 0); + const Tensor& out_backprop = MklGetInput(context, 1); + bool outbackprop_in_mkl_format = out_backprop_shape.IsMklTensor(); + + if (!outbackprop_in_mkl_format) { + // For avgpooling, tensor_in_shape should have 1 dimension, and 4 + // elements. + OP_REQUIRES( + context, + tensor_in_shape.dims() == 1 && tensor_in_shape.NumElements() == 4, + errors::InvalidArgument("original input shape must be " + "1-dimensional and 4 elements")); + + // For avgpooling, out_backprop should have 4 dimensions. + OP_REQUIRES(context, out_backprop.dims() == 4, + errors::InvalidArgument("out_backprop must be " + "4-dimensional")); + } else { + // Input in MKL format. + OP_REQUIRES( + context, out_backprop.dims() == 2, + errors::InvalidArgument("out_backprop in MKL format must be " + "2-dimensional")); + + // For avgpooling, out_backprop should have 4 dimensions. + OP_REQUIRES(context, out_backprop_shape.GetDimension() == 4, + errors::InvalidArgument("out_backprop must be " + "4-dimensional")); + } + + // TODO(inteltf): Get outbackprop layout. + // Do we need to create layout in every invocation? + if (!outbackprop_in_mkl_format) { + CHECK_EQ(dnnLayoutCreate_F32(<_user_outbackprop, params.in_dim, + params.out_sizes, params.out_strides), + E_SUCCESS); + } else { + lt_user_outbackprop = (dnnLayout_t)out_backprop_shape.GetCurLayout(); + } + + // Create the backward primitive + // Create DNN user layout + CHECK_EQ(dnnLayoutCreate_F32(<_user_input, params.in_dim, + params.in_sizes, params.in_strides), + E_SUCCESS); + + // Create PoolingBackward primitive + dnnAlgorithm_t algorithm = dnnAlgorithmPoolingAvg; + dnnPrimitiveAttributes_t primAttr = nullptr; + CHECK_EQ(dnnPoolingCreateBackward_F32( + &prim_pooling_bwd, primAttr, algorithm, lt_user_input, + params.kernel_size, params.kernel_stride, params.in_offset, + dnnBorderZerosAsymm), + E_SUCCESS); + + // Create expected outbackprop layout from the primitive. + CHECK_EQ(dnnLayoutCreateFromPrimitive_F32( + <_prim_outbackprop, prim_pooling_bwd, dnnResourceDiffDst), + E_SUCCESS); + + CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(<_workspace, prim_pooling_bwd, + dnnResourceWorkspace), + E_SUCCESS); + } + + void MklCleanup() { + bool outbackprop_in_mkl_format = out_backprop_shape.IsMklTensor(); + CHECK_EQ(dnnDelete_F32(prim_pooling_bwd), E_SUCCESS); + CHECK_EQ(dnnLayoutDelete_F32(lt_user_input), E_SUCCESS); + if (!outbackprop_in_mkl_format) { + CHECK_EQ(dnnLayoutDelete_F32(lt_user_outbackprop), E_SUCCESS); + } + CHECK_EQ(dnnLayoutDelete_F32(lt_prim_outbackprop), E_SUCCESS); + CHECK_EQ(dnnLayoutDelete_F32(lt_workspace), E_SUCCESS); + } + } MklAvgPoolingGradOpContext; + + std::vector<int32> ksize_; + std::vector<int32> stride_; + Padding padding_; + TensorFormat data_format_; +}; + +REGISTER_KERNEL_BUILDER(Name("MklAvgPool") + .Device(DEVICE_CPU) + .TypeConstraint<float>("T") + .Label(mkl_layer_registry::kMklLayerLabel), + MklAvgPoolingOp<CPUDevice, float>); + +REGISTER_KERNEL_BUILDER(Name("MklAvgPoolGrad") + .Device(DEVICE_CPU) + .TypeConstraint<float>("T") + .Label(mkl_layer_registry::kMklLayerLabel), + MklAvgPoolingGradOp<CPUDevice, float>); + +} // namespace tensorflow +#endif // INTEL_MKL |