Fixes for various MKLDNN unit test failures (#16059)

1. MklLayout pass changes Making workspace type uint8 for MaxPool; Handling duplicate control edge insertion 1) Handles case of inserting duplicate control edge (fixing Mkl layout graph pass unit test) 2) Enables uint8 as workspace tensor type (makes consistent with LRN workspace handling) Workspace tensor type change is also performed in MaxPool and MaxPoolGrad operators. 2. Handling MklReshape failing case MklReshape was failing on a unit test when Mkl layout and Tensorflow layout for input tensors were same, but shape of input tensor and output tensor was different. No reorder is required in such case, but reshape is needed. Before this fix, we were asserting that reorder is performed. 3. Adding support for empty input/filter tensors in Convolution backprop operators
author: Niranjan Hasabnis <niranjan.hasabnis@intel.com> 2018-01-16 21:48:52 -0800
committer: Gunhan Gulsoy <gunan@google.com> 2018-01-16 21:48:52 -0800
commit: ae700bb7462ee1bd4fed3c89441e962f64c89afd (patch)
tree: f509264b90a6ec86b39f08134e1ce3efcc2deea1 /tensorflow/core
parent: fa0dd4436f88425639fcd589ce3261249c6499be (diff)
9 files changed, 117 insertions, 28 deletions
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 986dadb272..55bc401b9d 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -3117,7 +3117,9 @@ void MklLayoutRewritePass::GetDummyMklTensorNode(std::unique_ptr<Graph>* g,
     Node* orig_input0 = nullptr;
     TF_CHECK_OK(orig_node->input_node(0,
                                       const_cast<const Node**>(&orig_input0)));
-    CHECK_NOTNULL((*g)->AddControlEdge(orig_input0, *out));
+    // Allow duplicate while adding control edge as it would fail (return
+    // NULL) if we try to add duplicate edge.
+    CHECK_NOTNULL((*g)->AddControlEdge(orig_input0, *out, true));
   }
 
   (*out)->set_assigned_device_name(orig_node->assigned_device_name());
@@ -3382,8 +3384,8 @@ void MklLayoutRewritePass::GetDummyWorkspaceTensorNode(
     std::unique_ptr<Graph>* g, Node** out, Node* orig_node) {
   // We use a tensor of shape {1} and value 0 to represent
   // dummy float tensor. We need this as a dummy workspace tensor.
-  // Workspace tensor has type float.
-  const DataType dt = DataTypeToEnum<float>::v();
+  // Workspace tensor has type uint8.
+  const DataType dt = DataTypeToEnum<uint8>::v();
   TensorProto proto;
   proto.set_dtype(dt);
   float zero[1] = {0};
@@ -3413,7 +3415,9 @@ void MklLayoutRewritePass::GetDummyWorkspaceTensorNode(
     Node* orig_input0 = nullptr;
     TF_CHECK_OK(orig_node->input_node(0,
                                       const_cast<const Node**>(&orig_input0)));
-    CHECK_NOTNULL((*g)->AddControlEdge(orig_input0, *out));
+    // Allow duplicate while adding control edge as it would fail (return
+    // NULL) if we try to add duplicate edge.
+    CHECK_NOTNULL((*g)->AddControlEdge(orig_input0, *out, true));
   }
 
   (*out)->set_assigned_device_name(orig_node->assigned_device_name());
@@ -3863,12 +3867,16 @@ Status MklLayoutRewritePass::MergeConv2DWithBiasAdd(std::unique_ptr<Graph>* g,
   // node are already copied in BuildNode. We handle control edges now.
   for (const Edge* e : pred->in_edges()) {
     if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
+      // Allow duplicate while adding control edge as it would fail (return
+      // NULL) if we try to add duplicate edge.
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node, true));
     }
   }
   for (const Edge* e : succ->in_edges()) {
     if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
+      // Allow duplicate while adding control edge as it would fail (return
+      // NULL) if we try to add duplicate edge.
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node, true));
     }
   }
 
@@ -3876,14 +3884,18 @@ Status MklLayoutRewritePass::MergeConv2DWithBiasAdd(std::unique_ptr<Graph>* g,
   // First, we will fix outgoing control edges from 'pred' node.
   for (const Edge* e : pred->out_edges()) {
     if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
+      // Allow duplicate while adding control edge as it would fail (return
+      // NULL) if we try to add duplicate edge.
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst(), true));
     }
   }
 
   // Second, we will fix outgoing control and data edges from 'succ' node.
   for (const Edge* e : succ->out_edges()) {
     if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
+      // Allow duplicate while adding control edge as it would fail (return
+      // NULL) if we try to add duplicate edge.
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst(), true));
     } else {
       // BiasAdd has only 1 output (at slot 0) and merged node also has only 1
       // output (at slot 0).
@@ -3966,12 +3978,16 @@ Status MklLayoutRewritePass::MergeConv2DBackpropFilterWithBiasAddGrad(
   // edges now.
   for (const Edge* e : badd->in_edges()) {
     if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
+      // Allow duplicate while adding control edge as it would fail (return
+      // NULL) if we try to add duplicate edge.
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node, true));
     }
   }
   for (const Edge* e : fltr->in_edges()) {
     if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
+      // Allow duplicate while adding control edge as it would fail (return
+      // NULL) if we try to add duplicate edge.
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node, true));
     }
   }
 
@@ -3987,7 +4003,9 @@ Status MklLayoutRewritePass::MergeConv2DBackpropFilterWithBiasAddGrad(
 
   for (const Edge* e : badd->out_edges()) {
     if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
+      // Allow duplicate while adding control edge as it would fail (return
+      // NULL) if we try to add duplicate edge.
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst(), true));
     } else {
       CHECK_NOTNULL((*g)->AddEdge(new_node, kMergedNodeBiasGradOutputIdx,
                                   e->dst(), e->dst_input()));
@@ -3997,7 +4015,11 @@ Status MklLayoutRewritePass::MergeConv2DBackpropFilterWithBiasAddGrad(
   // Second, we will fix outgoing control and data edges from 'fltr' node.
   for (const Edge* e : fltr->out_edges()) {
     if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
+      // We allow duplicate edge for this case since we already add control
+      // edge from new_node in line 3990. Line below could be adding same
+      // edge to same destination again. In such case, if we do not allow
+      // duplicate edge, then this call will fail.
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst(), true));
     } else {
       CHECK_NOTNULL((*g)->AddEdge(new_node, kMergedNodeFilterGradOutputIdx,
                                   e->dst(), e->dst_input()));
@@ -4091,7 +4113,9 @@ Status MklLayoutRewritePass::RewriteNode(std::unique_ptr<Graph>* g,
   // already copied in BuildNode. We need to handle control edges now.
   for (const Edge* e : orig_node->in_edges()) {
     if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
+      // Allow duplicate while adding control edge as it would fail (return
+      // NULL) if we try to add duplicate edge.
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node, true));
     }
   }
 
@@ -4104,7 +4128,9 @@ Status MklLayoutRewritePass::RewriteNode(std::unique_ptr<Graph>* g,
   // GetTensorDataIndex provides this mapping function.
   for (const Edge* e : orig_node->out_edges()) {
     if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
+      // Allow duplicate while adding control edge as it would fail (return
+      // NULL) if we try to add duplicate edge.
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst(), true));
     } else {
       CHECK_NOTNULL((*g)->AddEdge(new_node, GetTensorDataIndex(e->src_output(),
                             e->src()->num_outputs()),
diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
index 793fa24d99..54d4916d49 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
@@ -467,6 +467,13 @@ class MklConv2DCustomBackpropFilterOp :
     return filter_tf_shape;
   }
 
+  TensorShape GetOutputTfShape(const TensorShape& input_shape,
+                               const TensorShape& filter_shape,
+                               const TensorShape& outbprop_shape) {
+    // Shape of output of Conv2DBackpropFilter is same as shape of filter.
+    return filter_shape;
+  }
+
   const memory::dims& GetOutputDims(const memory::dims& fwd_input_dims,
                                     const memory::dims& fwd_filter_dims) {
     // Shape of output of Conv2DBackpropFilter is same as shape of filter.
diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
index db9e97e7ca..ef6db58d31 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
@@ -396,6 +396,13 @@ class MklConv2DCustomBackpropInputOp :
     return GetTfShape(context, kInputIndex_Filter);
   }
 
+  TensorShape GetOutputTfShape(const TensorShape& input_shape,
+                               const TensorShape& filter_shape,
+                               const TensorShape& outbprop_shape) {
+    // Output Shape of Conv2DBackpropInput is same as shape of Conv2D 'input'.
+    return input_shape;
+  }
+
   const memory::dims& GetOutputDims(const memory::dims& fwd_input_dims,
                                     const memory::dims& fwd_filter_dims) {
     // Output Shape of Conv2DBackpropInput is same as shape of Conv2D 'input'.
diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index a4e139bb54..0e77b45993 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -551,6 +551,13 @@ class MklConv2DOp : public OpKernel {
         output_mkl_shape.SetMklTensor(false);
         AllocateOutputSetMklShape(context, kOutputIndex_Dst, &output_tensor,
                                     src_tf_shape, output_mkl_shape);
+
+        // MklConv2D also outputs converted filter as 2nd output of Conv2D.
+        filter_mkl_shape.SetMklTensor(false);
+        Tensor* output_filter_tensor = nullptr;
+        AllocateOutputSetMklShape(context, kOutputIndex_Filter,
+                                  &output_filter_tensor,
+                                  filter_tf_shape, filter_mkl_shape);
         return;
       }
 
diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h
index b6883dbaa2..c6456bd5c3 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.h
+++ b/tensorflow/core/kernels/mkl_conv_ops.h
@@ -390,6 +390,29 @@ class MklConv2DBackpropCommonOp :  public OpKernel {
       TensorShape filter_tf_shape = MakeFilterTfShape(context, filter_tensor);
       TensorShape outbprop_tf_shape = GetTfShape(context, kOutbpropIdx);
 
+      // Corner cases: output with 0 elements and 0 batch size.
+      Tensor* output_tensor = nullptr;
+      if (input_tf_shape.num_elements() == 0 ||
+          filter_tf_shape.num_elements() == 0 ||
+          outbprop_tf_shape.num_elements() == 0) {
+        MklDnnShape output_mkl_shape;
+        output_mkl_shape.SetMklTensor(false);
+        TensorShape output_tf_shape = GetOutputTfShape(input_tf_shape,
+                                                       filter_tf_shape,
+                                                       outbprop_tf_shape);
+        const int kOutputIdx = 0;
+        AllocateOutputSetMklShape(context, kOutputIdx, &output_tensor,
+                                    output_tf_shape, output_mkl_shape);
+        CHECK_NOTNULL(output_tensor);
+
+        // if output tensor has more than 0 elements, we need to 0 them out.
+        for (size_t i = 0; i < output_tf_shape.num_elements(); ++i) {
+          output_tensor->flat<T>().data()[i] = 0;
+        }
+
+        return;
+      }
+
       // By default, all dims are in MKL order. Only dims in TF order
       // are those with prefix tf_order.
       memory::dims outbprop_dims, fwd_input_dims, fwd_filter_dims;
@@ -471,7 +494,6 @@ class MklConv2DBackpropCommonOp :  public OpKernel {
       output.SetOpMemDesc(bwd_output_dims, memory::format::any);
 
       // Operator-specific call to create and execute primitive.
-      Tensor* output_tensor = nullptr;
       CreatePrimitive(context, cpu_engine, fwd_pd, &input, &filter,
                       &outbackprop, &output, &output_tensor,
                       strides, padding_l, padding_r,
@@ -507,6 +529,11 @@ class MklConv2DBackpropCommonOp :  public OpKernel {
   virtual TensorShape MakeFilterTfShape(OpKernelContext* context,
                                        const Tensor& filter_tensor) = 0;
 
+  /// Get the TensorFlow shape of output tensor.
+  virtual TensorShape GetOutputTfShape(const TensorShape& input_shape,
+                                       const TensorShape& filter_shape,
+                                       const TensorShape& outbprop_shape) = 0;
+
   /// Get shape of output in MKL-DNN order. Computes shape of output from
   /// input shape (fwd_input_dims) and filter shape (fwd_filter_dims).
   virtual
diff --git a/tensorflow/core/kernels/mkl_maxpooling_op.cc b/tensorflow/core/kernels/mkl_maxpooling_op.cc
index de4d7d2e72..82c5229bab 100644
--- a/tensorflow/core/kernels/mkl_maxpooling_op.cc
+++ b/tensorflow/core/kernels/mkl_maxpooling_op.cc
@@ -517,7 +517,7 @@ class MklMaxPoolingOp : public MklPoolingForwardOpBase<T> {
 
       MklDnnData<T> dnn_data_input(&cpu_engine);
       MklDnnData<T> dnn_data_output(&cpu_engine);
-      MklDnnData<T> dnn_data_wksp(&cpu_engine);
+      MklDnnData<uint8> dnn_data_wksp(&cpu_engine);
 
       // initialize variables for the pooling op
       MklPoolParameters pool_params;
@@ -588,16 +588,16 @@ class MklMaxPoolingOp : public MklPoolingForwardOpBase<T> {
 
     void AllocateWorkspaceTensor(OpKernelContext* context,
                 const pooling_forward::primitive_desc& pool_fwd_prim_desc,
-                MklDnnData<T>* dnn_data_wksp) {
+                MklDnnData<uint8>* dnn_data_wksp) {
         CHECK_NOTNULL(dnn_data_wksp);
         Tensor* workspace_tensor = nullptr;
         memory::primitive_desc workspace_pd
                     = pool_fwd_prim_desc.workspace_primitive_desc();
-        size_t workspace_t_elems = this->GetNumTElements(workspace_pd);
+        size_t workspace_bytes = workspace_pd.get_size();
         MklDnnShape workspace_mkl_shape;
         workspace_mkl_shape.SetMklTensor(false);
         TensorShape workspace_tf_shape;
-        workspace_tf_shape.AddDim(workspace_t_elems);
+        workspace_tf_shape.AddDim(workspace_bytes);
         AllocateOutputSetMklShape(context, kOutputTensorIndexWorkspace,
                                 &workspace_tensor,
                                 workspace_tf_shape, workspace_mkl_shape);
@@ -651,7 +651,7 @@ class MklMaxPoolingGradOp : public MklPoolingBackwardOpBase<T> {
         if (!context->status().ok()) return;
 
         MklDnnData<T> grad_dnn_data(&cpu_engine);
-        MklDnnData<T> workspace_dnn_data(&cpu_engine);
+        MklDnnData<uint8> workspace_dnn_data(&cpu_engine);
         MklDnnData<T> output_dnn_data(&cpu_engine);
         Tensor* output_tensor = nullptr;
         MklPoolParameters pool_params;
@@ -770,7 +770,7 @@ class MklMaxPoolingGradOp : public MklPoolingBackwardOpBase<T> {
 
     void ConfigureWorkspace(const Tensor& workspace_tensor,
                         memory::primitive_desc workspace_pd,
-                        MklDnnData<T> *workspace_dnn_data) {
+                        MklDnnData<uint8> *workspace_dnn_data) {
         CHECK_NOTNULL(workspace_dnn_data);
 
         workspace_dnn_data->SetUsrMem(workspace_pd, &workspace_tensor);
@@ -811,7 +811,7 @@ class MklMaxPoolingGradOp : public MklPoolingBackwardOpBase<T> {
                     errors::InvalidArgument("Gradient must be "
                     "4-dimensional"));
         }
-        if (this->workspace_enabled_){
+        if (this->workspace_enabled_) {
             // The workspace should not be an MKL tensor
             OP_REQUIRES(context, workspace_mkl_shape.IsMklTensor() == false,
                     errors::InvalidArgument("Workspace tensor should not"
diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl_pooling_ops_common.h
index d33e91a15d..b974b2c59a 100644
--- a/tensorflow/core/kernels/mkl_pooling_ops_common.h
+++ b/tensorflow/core/kernels/mkl_pooling_ops_common.h
@@ -231,7 +231,7 @@ class MklPoolingForwardOpBase : public MklPoolingOpBase<T> {
                   const pooling_forward::primitive_desc& pool_fwd_desc,
                   const MklDnnData<T>* src,
                   MklDnnData<T>* dst,
-                  MklDnnData<T>* wksp = nullptr) {
+                  MklDnnData<uint8>* wksp = nullptr) {
     std::vector<primitive> net;
 
     // Create pooling primitive and add it to net
@@ -307,7 +307,7 @@ class MklPoolingBackwardOpBase : public MklPoolingOpBase<T> {
     MklDnnData<T>* input_gradient_diff_dst,
     MklDnnData<T>* output_diff_src,
     const memory::primitive_desc& target_diff_dst_pd,
-    const MklDnnData<T>* workspace = nullptr) {
+    const MklDnnData<uint8>* workspace = nullptr) {
 
     std::vector<primitive> net;
 
diff --git a/tensorflow/core/kernels/mkl_reshape_op.cc b/tensorflow/core/kernels/mkl_reshape_op.cc
index 11c92ebdb4..b41e529357 100644
--- a/tensorflow/core/kernels/mkl_reshape_op.cc
+++ b/tensorflow/core/kernels/mkl_reshape_op.cc
@@ -256,11 +256,18 @@ class MklReshapeOp : public OpKernel {
               AllocateOutputSetMklShape(context, kOutputSlotIdx, &output_tensor,
                                         shape_to, mkl_shape_output);
 
-              // Insert reorder between Mkl layout and TensorFlow layout.
+              // Insert reorder between Mkl layout and TensorFlow layout if
+              // needed. If reorder is not needed but reshape is needed (since
+              // shape_from != shape_to), then we just copy input tensor to
+              // output tensor with target shape (we cannot forward Mkl layout
+              // in such case because shape has changed.)
               std::vector<primitive> net;
-              CHECK_EQ(dnn_data_input.CheckReorderToOpMem(output_tf_pd,
-                       output_tensor, &net), true);
-              stream(stream::kind::eager).submit(net).wait();
+              if (dnn_data_input.CheckReorderToOpMem(output_tf_pd,
+                       output_tensor, &net)) {
+                stream(stream::kind::eager).submit(net).wait();
+              } else {
+                output_tensor->CopyFrom(input_tensor, shape_to);
+              }
               return;
           } else {
             // If dimensions that are being expanded or collapsed are
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index 536fc7c0c1..3f72b41569 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -1818,7 +1818,11 @@ REGISTER_OP("_MklMaxPool")
     .Input("input: T")
     .Input("mkl_input: uint8")
     .Output("output: T")
+#ifndef INTEL_MKL_DNN
     .Output("workspace: T")
+#else
+    .Output("workspace: uint8")
+#endif
     .Output("mkl_output: uint8")
     .Output("mkl_workspace: uint8")
     .SetShapeFn(shape_inference::MaxPoolShape)
@@ -1840,7 +1844,11 @@ REGISTER_OP("_MklMaxPoolGrad")
     .Input("orig_input: T")
     .Input("orig_output: T")
     .Input("grad: T")
+#ifndef INTEL_MKL_DNN
     .Input("workspace: T")
+#else
+    .Input("workspace: uint8")
+#endif
     .Input("mkl_orig_input: uint8")
     .Input("mkl_orig_output: uint8")
     .Input("mkl_grad: uint8")
author	Niranjan Hasabnis <niranjan.hasabnis@intel.com>	2018-01-16 21:48:52 -0800
committer	Gunhan Gulsoy <gunan@google.com>	2018-01-16 21:48:52 -0800
commit	ae700bb7462ee1bd4fed3c89441e962f64c89afd (patch)
tree	f509264b90a6ec86b39f08134e1ce3efcc2deea1 /tensorflow/core
parent	fa0dd4436f88425639fcd589ce3261249c6499be (diff)