1 files changed, 65 insertions, 47 deletions
diff --git a/tensorflow/core/kernels/mkl_lrn_op.cc b/tensorflow/core/kernels/mkl_lrn_op.cc
index 07a7e6b5da..070aeff49f 100644
--- a/tensorflow/core/kernels/mkl_lrn_op.cc
+++ b/tensorflow/core/kernels/mkl_lrn_op.cc
@@ -22,6 +22,9 @@ limitations under the License.
 
 #define EIGEN_USE_THREADS
 #include <vector>
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "third_party/mkl/include/mkl_dnn.h"
+#include "third_party/mkl/include/mkl_dnn_types.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -30,9 +33,6 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/util/mkl_util.h"
 #include "tensorflow/core/util/tensor_format.h"
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "third_party/mkl/include/mkl_dnn.h"
-#include "third_party/mkl/include/mkl_dnn_types.h"
 
 #if !defined(IS_MOBILE_PLATFORM)
 #include "tensorflow/core/util/work_sharder.h"
@@ -66,10 +66,11 @@ class MklLRNOp : public OpKernel {
   explicit MklLRNOp(OpKernelConstruction* context) : OpKernel(context) {
     int64 depth_radius64;
     OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64));
-    OP_REQUIRES(context, FastBoundsCheck(depth_radius64,
-                                         std::numeric_limits<int>::max()),
-                errors::InvalidArgument("depth_radius = ", depth_radius64,
-                                        " larger than int max"));
+    OP_REQUIRES(
+        context,
+        FastBoundsCheck(depth_radius64, std::numeric_limits<int>::max()),
+        errors::InvalidArgument("depth_radius = ", depth_radius64,
+                                " larger than int max"));
     depth_radius_ = static_cast<size_t>(depth_radius64);
 
     OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_));
@@ -92,9 +93,10 @@ class MklLRNOp : public OpKernel {
                               : input.dims();
     OP_REQUIRES(context, mkl_context.in_dims == 4,
                 errors::InvalidArgument("input must be 4-dimensional"));
-    OP_REQUIRES(context, FastBoundsCheck(input.NumElements(),
-                                         std::numeric_limits<int>::max()),
-                errors::InvalidArgument("argument to LRN too large"));
+    OP_REQUIRES(
+        context,
+        FastBoundsCheck(input.NumElements(), std::numeric_limits<int>::max()),
+        errors::InvalidArgument("argument to LRN too large"));
 
     if (!input_in_mkl_format) {
       mkl_context.MklDefaultToEigen(context, depth_radius_, bias_, alpha_,
@@ -102,6 +104,15 @@ class MklLRNOp : public OpKernel {
       return;
     }
 
+    // TODO(inteltf) MKL will support depth radius not equal to 2 in the future
+    if (depth_radius_ != 2) {
+      Tensor converted_tensor =
+          ConvertMklToTF<T>(context, input, mkl_context.input_shape);
+      mkl_context.MklDefaultToEigen(context, depth_radius_, bias_, alpha_,
+                                    beta_, converted_tensor);
+      return;
+    }
+
     if (input_in_mkl_format) {
       // MKL supports normalization over channel dimension only
       if (mkl_context.input_shape.tf_dim_idx(mkl_context.in_dims - 1) ==
@@ -334,10 +345,11 @@ class MklLRNGradOp : public OpKernel {
   explicit MklLRNGradOp(OpKernelConstruction* context) : OpKernel(context) {
     int64 depth_radius64;
     OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64));
-    OP_REQUIRES(context, FastBoundsCheck(depth_radius64,
-                                         std::numeric_limits<int>::max()),
-                errors::InvalidArgument("depth_radius = ", depth_radius64,
-                                        " larger than int max"));
+    OP_REQUIRES(
+        context,
+        FastBoundsCheck(depth_radius64, std::numeric_limits<int>::max()),
+        errors::InvalidArgument("depth_radius = ", depth_radius64,
+                                " larger than int max"));
     depth_radius_ = static_cast<int>(depth_radius64);
     OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_));
     OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_));
@@ -541,9 +553,6 @@ class MklLRNGradOp : public OpKernel {
       CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(&lt_bdw_input, lrn_bwd,
                                                 dnnResourceDiffDst),
                E_SUCCESS);
-      CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(&lt_internal_input, lrn_bwd,
-                                                dnnResourceSrc),
-               E_SUCCESS);
 
       bool ingrad_in_mkl_format = ingrad_shape.IsMklTensor();
       if (ingrad_in_mkl_format) {
@@ -572,37 +581,44 @@ class MklLRNGradOp : public OpKernel {
         }
       }
 
-      bool inimage_in_mkl_format = inimage_shape.IsMklTensor();
-      if (inimage_in_mkl_format) {
-        if (!dnnLayoutCompare_F32(
-                lt_internal_input,
-                static_cast<dnnLayout_t>(inimage_shape.GetCurLayout()))) {
-          AllocTmpBuffer(context, mkl_tmp_image_buf_tensor, lt_internal_input,
-                         &res_lrn_bwd[dnnResourceSrc]);
-          ingrad_shape.GetConvertedFlatData(lt_internal_input, user_fwd_input,
-                                            res_lrn_bwd[dnnResourceSrc]);
-        } else {
-          res_lrn_bwd[dnnResourceSrc] = user_fwd_input;
-        }
+// Although MKL documentation for LRN does not specify setting/getting
+// of dnnResourceSrc and dnnResourceDst, Caffe code sets dnnResourceSrc.
+// So we set dnnResourceSrc here. But we do not know why we are setting
+// dnnResourceDst.
+#if 0
+    // NOTE: The code below is kept just so that we know how we should handle
+    // dnnResourceSrc if the primitive layout for dnnResourceSrc was supported.
+
+    if (!dnnLayoutCompare_F32(lt_internal_input,
+         static_cast<dnnLayout_t>inimage_shape.GetCurLayout())) {
+      AllocTmpBuffer(context, mkl_tmp_image_buf_tensor, lt_internal_input,
+                     &res_lrn_bwd[dnnResourceSrc]);
+      inimage_shape.GetConvertedFlatData(lt_internal_input,
+                                           user_fwd_input,
+                                           res_lrn_bwd[dnnResourceSrc]);
+    } else {
+      res_lrn_bwd[dnnResourceSrc] = user_fwd_input;
+    }
+#endif
+
+      // Since we cannot get expected layout for dnnResourceSrc, we construct
+      // buffer using
+      // MKL format if input is in MKL format.
+      if (inimage_shape.IsMklTensor()) {
+        AllocTmpBuffer(context, mkl_tmp_image_buf_tensor,
+                       (dnnLayout_t)inimage_shape.GetCurLayout(),
+                       &res_lrn_bwd[dnnResourceSrc]);
       } else {
-        if (!dnnLayoutCompare_F32(
-                lt_internal_input,
-                static_cast<dnnLayout_t>(inimage_shape.GetCurLayout()))) {
-          CHECK_EQ(dnnConversionCreate_F32(
-                       &convert_input,
-                       static_cast<dnnLayout_t>(inimage_shape.GetCurLayout()),
-                       lt_internal_input),
-                   E_SUCCESS);
+        res_lrn_bwd[dnnResourceSrc] = user_fwd_input;
+      }
 
-          AllocTmpBuffer(context, mkl_tmp_image_buf_tensor, lt_internal_input,
-                         &res_lrn_bwd[dnnResourceSrc]);
-          CHECK_EQ(dnnConversionExecute_F32(convert_input, user_fwd_input,
-                                            res_lrn_bwd[dnnResourceSrc]),
-                   E_SUCCESS);
-          dnnDelete_F32(convert_input);
-        } else {
-          res_lrn_bwd[dnnResourceSrc] = user_fwd_input;
-        }
+      // Same comment as above.
+      if (outimage_shape.IsMklTensor()) {
+        AllocTmpBuffer(context, mkl_tmp_outimage_buf_tensor,
+                       (dnnLayout_t)outimage_shape.GetCurLayout(),
+                       &res_lrn_bwd[dnnResourceDst]);
+      } else {
+        res_lrn_bwd[dnnResourceDst] = user_fwd_output;
       }
 
       res_lrn_bwd[dnnResourceWorkspace] = workspace_buffer;
@@ -612,6 +628,8 @@ class MklLRNGradOp : public OpKernel {
     // TODO(intelft) Check if we can use EigenLRNOp directly instead of making a
     // copy.
     void MklDefaultToEigen(OpKernelContext* context) {
+      // CHECK(false);
+
       Tensor in_grads;
       Tensor in_image;
       Tensor out_image;
@@ -691,7 +709,7 @@ class MklLRNGradOp : public OpKernel {
       Shard(worker_threads.num_threads, worker_threads.workers, nodes * batch,
             depth * depth, shard);
     }
-
+		
     // release mkl resources
     void Mklcleanup() {
       bool ingrad_in_mkl_format = ingrad_shape.IsMklTensor();