aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/mkl_avgpooling_op.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/kernels/mkl_avgpooling_op.cc')
-rw-r--r--tensorflow/core/kernels/mkl_avgpooling_op.cc428
1 files changed, 428 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/mkl_avgpooling_op.cc b/tensorflow/core/kernels/mkl_avgpooling_op.cc
new file mode 100644
index 0000000000..71918fe269
--- /dev/null
+++ b/tensorflow/core/kernels/mkl_avgpooling_op.cc
@@ -0,0 +1,428 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ ==============================================================================*/
+
+#ifdef INTEL_MKL
+#define EIGEN_USE_THREADS
+
+#include "tensorflow/core/common_runtime/device.h"
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/numeric_op.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/util/mkl_util.h"
+
+#include "tensorflow/core/kernels/mkl_pooling_ops_common.h"
+
+namespace tensorflow {
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+template <typename Device, typename T>
+class MklAvgPoolingOp : public UnaryOp<T> {
+ public:
+ explicit MklAvgPoolingOp(OpKernelConstruction* context)
+ : UnaryOp<T>(context) {
+ string data_format;
+ OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
+ OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
+ errors::InvalidArgument("Invalid data format"));
+
+ OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_));
+ OP_REQUIRES(context, ksize_.size() == 4,
+ errors::InvalidArgument("Sliding window ksize field must "
+ "specify 4 dimensions"));
+ OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_));
+ OP_REQUIRES(context, stride_.size() == 4,
+ errors::InvalidArgument("Sliding window stride field must "
+ "specify 4 dimensions"));
+ OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+ OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1,
+ errors::Unimplemented("Pooling is not yet supported on the "
+ "batch dimension."));
+ }
+
+ void Compute(OpKernelContext* context) override {
+ MklAvgPoolingOpContext mkl_context;
+ const Tensor& tensor_in = MklGetInput(context, 0);
+ GetMklShape(context, 0, &mkl_context.input_shape);
+ bool input_in_mkl_format = mkl_context.input_shape.IsMklTensor();
+
+ if (!input_in_mkl_format)
+ mkl_context.params.in_dim = tensor_in.dims();
+ else
+ mkl_context.params.in_dim = mkl_context.input_shape.GetDimension();
+
+ MklPoolParameters pool_params;
+ if (!input_in_mkl_format) {
+ pool_params.Init(context, ksize_, stride_, padding_, data_format_,
+ tensor_in.shape());
+ } else {
+ pool_params.Init(context, ksize_, stride_, padding_, data_format_,
+ &mkl_context.input_shape);
+ }
+
+ // Extract the parameters for the op from the pooling specs
+ ExtractMklOpParams(context, data_format_, pool_params, &mkl_context.params);
+
+ Tensor mkl_tmp_input_buf_tensor_;
+ mkl_context.MklCreateLayoutsAndPrimitives(context,
+ &mkl_tmp_input_buf_tensor_);
+
+ Tensor workspace_tensor;
+ void* workspace_buf;
+ AllocTmpBuffer(context, &workspace_tensor, mkl_context.lt_workspace,
+ &workspace_buf);
+
+ if (mkl_context.convert_input != nullptr) {
+ if (input_in_mkl_format == false) {
+ CHECK_EQ(
+ dnnConversionExecute_F32(
+ mkl_context.convert_input,
+ static_cast<void*>(const_cast<T*>(tensor_in.flat<T>().data())),
+ mkl_context.input_buf),
+ E_SUCCESS);
+ CHECK_EQ(dnnDelete_F32(mkl_context.convert_input), E_SUCCESS);
+ } else {
+ mkl_context.input_shape.GetConvertedFlatData(
+ mkl_context.lt_prim_input,
+ static_cast<void*>(const_cast<T*>(tensor_in.flat<T>().data())),
+ mkl_context.input_buf);
+ }
+ mkl_context.pooling_res[dnnResourceSrc] = mkl_context.input_buf;
+ } else {
+ mkl_context.pooling_res[dnnResourceSrc] =
+ static_cast<void*>(const_cast<T*>(tensor_in.flat<T>().data()));
+ }
+
+ // Declare output tensor and allocate memory
+ Tensor* output = nullptr;
+ TensorShape tensor_out_shape;
+ MklShape mkl_out_shape;
+ mkl_out_shape.SetMklTensor(true);
+ mkl_out_shape.SetMklLayout(mkl_context.prim_pooling_fwd, dnnResourceDst);
+ mkl_out_shape.SetTfLayout(mkl_context.params.in_dim,
+ mkl_context.params.out_sizes,
+ mkl_context.params.out_strides);
+ mkl_out_shape.SetTfDimOrder(mkl_context.params.in_dim, data_format_);
+
+ tensor_out_shape.AddDim(dnnLayoutGetMemorySize_F32(static_cast<dnnLayout_t>(
+ mkl_out_shape.GetMklLayout())) /
+ sizeof(T));
+
+ AllocateOutputSetMklshape(context, 0, &output, tensor_out_shape,
+ mkl_out_shape);
+ mkl_context.pooling_res[dnnResourceDst] =
+ static_cast<void*>(output->flat<T>().data());
+
+ mkl_context.pooling_res[dnnResourceWorkspace] = workspace_buf;
+
+ CHECK_EQ(
+ dnnExecute_F32(mkl_context.prim_pooling_fwd, mkl_context.pooling_res),
+ E_SUCCESS);
+
+ mkl_context.MklCleanup();
+ }
+
+ private:
+ typedef struct {
+ MklPoolingOpParams params;
+ MklShape input_shape;
+ dnnPrimitive_t prim_pooling_fwd, convert_input;
+ dnnLayout_t lt_user_input, lt_prim_input, lt_workspace;
+ void* input_buf;
+ void* pooling_res[dnnResourceNumber];
+
+ void MklCreateLayoutsAndPrimitives(OpKernelContext* context,
+ Tensor* mkl_tmp_input_buf_tensor) {
+ bool input_in_mkl_format = input_shape.IsMklTensor();
+
+ if (!input_in_mkl_format) {
+ CHECK_EQ(dnnLayoutCreate_F32(&lt_user_input, params.in_dim,
+ params.in_sizes, params.in_strides),
+ E_SUCCESS);
+ } else {
+ lt_user_input = (dnnLayout_t)input_shape.GetCurLayout();
+ }
+
+ dnnAlgorithm_t algorithm = dnnAlgorithmPoolingAvg;
+ dnnPrimitiveAttributes_t primAttr = nullptr;
+
+ // Create DNN primitives
+ CHECK_EQ(dnnPoolingCreateForward_F32(
+ &prim_pooling_fwd, primAttr, algorithm, lt_user_input,
+ params.kernel_size, params.kernel_stride, params.in_offset,
+ dnnBorderZerosAsymm),
+ E_SUCCESS);
+
+ CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(
+ &lt_prim_input, prim_pooling_fwd, dnnResourceSrc),
+ E_SUCCESS);
+ if (!dnnLayoutCompare_F32(lt_user_input, lt_prim_input)) {
+ CHECK_EQ(dnnConversionCreate_F32(&convert_input, lt_user_input,
+ lt_prim_input),
+ E_SUCCESS);
+
+ AllocTmpBuffer(context, mkl_tmp_input_buf_tensor, lt_prim_input,
+ &input_buf);
+ }
+
+ CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(&lt_workspace, prim_pooling_fwd,
+ dnnResourceWorkspace),
+ E_SUCCESS);
+ }
+
+ void MklCleanup() {
+ bool input_in_mkl_format = input_shape.IsMklTensor();
+ if (!input_in_mkl_format) {
+ CHECK_EQ(dnnLayoutDelete_F32(lt_user_input), E_SUCCESS);
+ }
+
+ CHECK_EQ(dnnDelete_F32(prim_pooling_fwd), E_SUCCESS);
+ CHECK_EQ(dnnLayoutDelete_F32(lt_prim_input), E_SUCCESS);
+ }
+ } MklAvgPoolingOpContext;
+
+ std::vector<int32> ksize_;
+ std::vector<int32> stride_;
+ Padding padding_;
+ TensorFormat data_format_;
+};
+
+//-----------------------------------------------------------------------------
+
+template <class Device, class T>
+class MklAvgPoolingGradOp : public OpKernel {
+ public:
+ explicit MklAvgPoolingGradOp(OpKernelConstruction* context)
+ : OpKernel(context) {
+ string data_format;
+
+ OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
+ OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
+ errors::InvalidArgument("Invalid data format"));
+ OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_));
+ OP_REQUIRES(context, ksize_.size() == 4,
+ errors::InvalidArgument("Sliding window ksize field must "
+ "specify 4 dimensions"));
+ OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_));
+ OP_REQUIRES(context, stride_.size() == 4,
+ errors::InvalidArgument("Sliding window strides field must "
+ "specify 4 dimensions"));
+ OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+ OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1,
+ errors::Unimplemented("Pooling is not yet supported on the "
+ "batch dimension."));
+ }
+
+ void Compute(OpKernelContext* context) override {
+ MklAvgPoolingGradOpContext mkl_context;
+ const Tensor& tensor_in_shape = MklGetInput(context, 0);
+ const Tensor& out_backprop = MklGetInput(context, 1);
+ GetMklShape(context, 1, &mkl_context.out_backprop_shape);
+ bool outbackprop_in_mkl_format =
+ mkl_context.out_backprop_shape.IsMklTensor();
+
+ TensorShape output_shape;
+ auto shape_vec = tensor_in_shape.vec<int32>();
+ for (int64 i = 0; i < tensor_in_shape.NumElements(); ++i) {
+ output_shape.AddDim(shape_vec(i));
+ }
+
+ MklPoolParameters pool_params;
+ pool_params.Init(context, ksize_, stride_, padding_, data_format_,
+ output_shape);
+
+ // Extract the parameters for the op from the pooling specs
+ ExtractMklOpParams(context, data_format_, pool_params, &mkl_context.params);
+
+ // Tensors needed to create temporary buffers
+ Tensor outbackprop_buf_tensor;
+ void* outbackprop_buf;
+ mkl_context.MklCreateLayoutsAndPrimitives(context);
+
+ // Check if outbackprop layout requires conversion.
+ if (!dnnLayoutCompare_F32(mkl_context.lt_user_outbackprop,
+ mkl_context.lt_prim_outbackprop)) {
+ CHECK_EQ(dnnConversionCreate_F32(&mkl_context.convert_outbackprop,
+ mkl_context.lt_user_outbackprop,
+ mkl_context.lt_prim_outbackprop),
+ E_SUCCESS);
+
+ AllocTmpBuffer(context, &outbackprop_buf_tensor,
+ mkl_context.lt_prim_outbackprop, &outbackprop_buf);
+
+ if (!outbackprop_in_mkl_format) {
+ CHECK_EQ(dnnConversionExecute_F32(mkl_context.convert_outbackprop,
+ static_cast<void*>(const_cast<T*>(
+ out_backprop.flat<T>().data())),
+ outbackprop_buf),
+ E_SUCCESS);
+ CHECK_EQ(dnnDelete_F32(mkl_context.convert_outbackprop), E_SUCCESS);
+ } else {
+ mkl_context.out_backprop_shape.GetConvertedFlatData(
+ mkl_context.lt_prim_outbackprop,
+ static_cast<void*>(const_cast<T*>(out_backprop.flat<T>().data())),
+ outbackprop_buf);
+ }
+ mkl_context.pooling_res[dnnResourceDiffDst] = outbackprop_buf;
+ } else {
+ mkl_context.pooling_res[dnnResourceDiffDst] =
+ static_cast<void*>(const_cast<T*>(out_backprop.flat<T>().data()));
+ }
+
+ // Handle workspace requirements.
+ Tensor workspace_buf_tensor;
+ void* workspace_buf;
+ AllocTmpBuffer(context, &workspace_buf_tensor, mkl_context.lt_workspace,
+ &workspace_buf);
+ mkl_context.pooling_res[dnnResourceWorkspace] = workspace_buf;
+
+ // Handle MKL output tensor setup.
+ Tensor* output = nullptr;
+ TensorShape tensor_out_shape;
+ MklShape mkl_out_shape;
+ mkl_out_shape.SetMklTensor(true);
+ mkl_out_shape.SetMklLayout(mkl_context.prim_pooling_bwd,
+ dnnResourceDiffSrc);
+ mkl_out_shape.SetTfLayout(mkl_context.params.in_dim,
+ mkl_context.params.in_sizes,
+ mkl_context.params.in_strides);
+ mkl_out_shape.SetTfDimOrder(mkl_context.params.in_dim, data_format_);
+
+ tensor_out_shape.AddDim(dnnLayoutGetMemorySize_F32(static_cast<dnnLayout_t>(
+ mkl_out_shape.GetMklLayout())) /
+ sizeof(T));
+
+ AllocateOutputSetMklshape(context, 0, &output, tensor_out_shape,
+ mkl_out_shape);
+
+ // Set output tensor.
+ mkl_context.pooling_res[dnnResourceDiffSrc] =
+ static_cast<void*>(output->flat<T>().data());
+
+ // Execute primitive.
+ CHECK_EQ(
+ dnnExecute_F32(mkl_context.prim_pooling_bwd, mkl_context.pooling_res),
+ E_SUCCESS);
+
+ mkl_context.MklCleanup();
+ }
+
+ private:
+ typedef struct {
+ MklPoolingOpParams params;
+ MklShape out_backprop_shape;
+ dnnPrimitive_t prim_pooling_bwd, convert_outbackprop;
+ void* pooling_res[dnnResourceNumber];
+ dnnLayout_t lt_user_input, lt_user_outbackprop, lt_prim_outbackprop,
+ lt_workspace;
+
+ void MklCreateLayoutsAndPrimitives(OpKernelContext* context) {
+ const Tensor& tensor_in_shape = MklGetInput(context, 0);
+ const Tensor& out_backprop = MklGetInput(context, 1);
+ bool outbackprop_in_mkl_format = out_backprop_shape.IsMklTensor();
+
+ if (!outbackprop_in_mkl_format) {
+ // For avgpooling, tensor_in_shape should have 1 dimension, and 4
+ // elements.
+ OP_REQUIRES(
+ context,
+ tensor_in_shape.dims() == 1 && tensor_in_shape.NumElements() == 4,
+ errors::InvalidArgument("original input shape must be "
+ "1-dimensional and 4 elements"));
+
+ // For avgpooling, out_backprop should have 4 dimensions.
+ OP_REQUIRES(context, out_backprop.dims() == 4,
+ errors::InvalidArgument("out_backprop must be "
+ "4-dimensional"));
+ } else {
+ // Input in MKL format.
+ OP_REQUIRES(
+ context, out_backprop.dims() == 2,
+ errors::InvalidArgument("out_backprop in MKL format must be "
+ "2-dimensional"));
+
+ // For avgpooling, out_backprop should have 4 dimensions.
+ OP_REQUIRES(context, out_backprop_shape.GetDimension() == 4,
+ errors::InvalidArgument("out_backprop must be "
+ "4-dimensional"));
+ }
+
+ // TODO(inteltf): Get outbackprop layout.
+ // Do we need to create layout in every invocation?
+ if (!outbackprop_in_mkl_format) {
+ CHECK_EQ(dnnLayoutCreate_F32(&lt_user_outbackprop, params.in_dim,
+ params.out_sizes, params.out_strides),
+ E_SUCCESS);
+ } else {
+ lt_user_outbackprop = (dnnLayout_t)out_backprop_shape.GetCurLayout();
+ }
+
+ // Create the backward primitive
+ // Create DNN user layout
+ CHECK_EQ(dnnLayoutCreate_F32(&lt_user_input, params.in_dim,
+ params.in_sizes, params.in_strides),
+ E_SUCCESS);
+
+ // Create PoolingBackward primitive
+ dnnAlgorithm_t algorithm = dnnAlgorithmPoolingAvg;
+ dnnPrimitiveAttributes_t primAttr = nullptr;
+ CHECK_EQ(dnnPoolingCreateBackward_F32(
+ &prim_pooling_bwd, primAttr, algorithm, lt_user_input,
+ params.kernel_size, params.kernel_stride, params.in_offset,
+ dnnBorderZerosAsymm),
+ E_SUCCESS);
+
+ // Create expected outbackprop layout from the primitive.
+ CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(
+ &lt_prim_outbackprop, prim_pooling_bwd, dnnResourceDiffDst),
+ E_SUCCESS);
+
+ CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(&lt_workspace, prim_pooling_bwd,
+ dnnResourceWorkspace),
+ E_SUCCESS);
+ }
+
+ void MklCleanup() {
+ bool outbackprop_in_mkl_format = out_backprop_shape.IsMklTensor();
+ CHECK_EQ(dnnDelete_F32(prim_pooling_bwd), E_SUCCESS);
+ CHECK_EQ(dnnLayoutDelete_F32(lt_user_input), E_SUCCESS);
+ if (!outbackprop_in_mkl_format) {
+ CHECK_EQ(dnnLayoutDelete_F32(lt_user_outbackprop), E_SUCCESS);
+ }
+ CHECK_EQ(dnnLayoutDelete_F32(lt_prim_outbackprop), E_SUCCESS);
+ CHECK_EQ(dnnLayoutDelete_F32(lt_workspace), E_SUCCESS);
+ }
+ } MklAvgPoolingGradOpContext;
+
+ std::vector<int32> ksize_;
+ std::vector<int32> stride_;
+ Padding padding_;
+ TensorFormat data_format_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("MklAvgPool")
+ .Device(DEVICE_CPU)
+ .TypeConstraint<float>("T")
+ .Label(mkl_layer_registry::kMklLayerLabel),
+ MklAvgPoolingOp<CPUDevice, float>);
+
+REGISTER_KERNEL_BUILDER(Name("MklAvgPoolGrad")
+ .Device(DEVICE_CPU)
+ .TypeConstraint<float>("T")
+ .Label(mkl_layer_registry::kMklLayerLabel),
+ MklAvgPoolingGradOp<CPUDevice, float>);
+
+} // namespace tensorflow
+#endif // INTEL_MKL