aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/mkl_lrn_op.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/kernels/mkl_lrn_op.cc')
-rw-r--r--tensorflow/core/kernels/mkl_lrn_op.cc112
1 files changed, 65 insertions, 47 deletions
diff --git a/tensorflow/core/kernels/mkl_lrn_op.cc b/tensorflow/core/kernels/mkl_lrn_op.cc
index 07a7e6b5da..070aeff49f 100644
--- a/tensorflow/core/kernels/mkl_lrn_op.cc
+++ b/tensorflow/core/kernels/mkl_lrn_op.cc
@@ -22,6 +22,9 @@ limitations under the License.
#define EIGEN_USE_THREADS
#include <vector>
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "third_party/mkl/include/mkl_dnn.h"
+#include "third_party/mkl/include/mkl_dnn_types.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/core/framework/tensor.h"
@@ -30,9 +33,6 @@ limitations under the License.
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/util/mkl_util.h"
#include "tensorflow/core/util/tensor_format.h"
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "third_party/mkl/include/mkl_dnn.h"
-#include "third_party/mkl/include/mkl_dnn_types.h"
#if !defined(IS_MOBILE_PLATFORM)
#include "tensorflow/core/util/work_sharder.h"
@@ -66,10 +66,11 @@ class MklLRNOp : public OpKernel {
explicit MklLRNOp(OpKernelConstruction* context) : OpKernel(context) {
int64 depth_radius64;
OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64));
- OP_REQUIRES(context, FastBoundsCheck(depth_radius64,
- std::numeric_limits<int>::max()),
- errors::InvalidArgument("depth_radius = ", depth_radius64,
- " larger than int max"));
+ OP_REQUIRES(
+ context,
+ FastBoundsCheck(depth_radius64, std::numeric_limits<int>::max()),
+ errors::InvalidArgument("depth_radius = ", depth_radius64,
+ " larger than int max"));
depth_radius_ = static_cast<size_t>(depth_radius64);
OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_));
@@ -92,9 +93,10 @@ class MklLRNOp : public OpKernel {
: input.dims();
OP_REQUIRES(context, mkl_context.in_dims == 4,
errors::InvalidArgument("input must be 4-dimensional"));
- OP_REQUIRES(context, FastBoundsCheck(input.NumElements(),
- std::numeric_limits<int>::max()),
- errors::InvalidArgument("argument to LRN too large"));
+ OP_REQUIRES(
+ context,
+ FastBoundsCheck(input.NumElements(), std::numeric_limits<int>::max()),
+ errors::InvalidArgument("argument to LRN too large"));
if (!input_in_mkl_format) {
mkl_context.MklDefaultToEigen(context, depth_radius_, bias_, alpha_,
@@ -102,6 +104,15 @@ class MklLRNOp : public OpKernel {
return;
}
+ // TODO(inteltf) MKL will support depth radius not equal to 2 in the future
+ if (depth_radius_ != 2) {
+ Tensor converted_tensor =
+ ConvertMklToTF<T>(context, input, mkl_context.input_shape);
+ mkl_context.MklDefaultToEigen(context, depth_radius_, bias_, alpha_,
+ beta_, converted_tensor);
+ return;
+ }
+
if (input_in_mkl_format) {
// MKL supports normalization over channel dimension only
if (mkl_context.input_shape.tf_dim_idx(mkl_context.in_dims - 1) ==
@@ -334,10 +345,11 @@ class MklLRNGradOp : public OpKernel {
explicit MklLRNGradOp(OpKernelConstruction* context) : OpKernel(context) {
int64 depth_radius64;
OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64));
- OP_REQUIRES(context, FastBoundsCheck(depth_radius64,
- std::numeric_limits<int>::max()),
- errors::InvalidArgument("depth_radius = ", depth_radius64,
- " larger than int max"));
+ OP_REQUIRES(
+ context,
+ FastBoundsCheck(depth_radius64, std::numeric_limits<int>::max()),
+ errors::InvalidArgument("depth_radius = ", depth_radius64,
+ " larger than int max"));
depth_radius_ = static_cast<int>(depth_radius64);
OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_));
OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_));
@@ -541,9 +553,6 @@ class MklLRNGradOp : public OpKernel {
CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(&lt_bdw_input, lrn_bwd,
dnnResourceDiffDst),
E_SUCCESS);
- CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(&lt_internal_input, lrn_bwd,
- dnnResourceSrc),
- E_SUCCESS);
bool ingrad_in_mkl_format = ingrad_shape.IsMklTensor();
if (ingrad_in_mkl_format) {
@@ -572,37 +581,44 @@ class MklLRNGradOp : public OpKernel {
}
}
- bool inimage_in_mkl_format = inimage_shape.IsMklTensor();
- if (inimage_in_mkl_format) {
- if (!dnnLayoutCompare_F32(
- lt_internal_input,
- static_cast<dnnLayout_t>(inimage_shape.GetCurLayout()))) {
- AllocTmpBuffer(context, mkl_tmp_image_buf_tensor, lt_internal_input,
- &res_lrn_bwd[dnnResourceSrc]);
- ingrad_shape.GetConvertedFlatData(lt_internal_input, user_fwd_input,
- res_lrn_bwd[dnnResourceSrc]);
- } else {
- res_lrn_bwd[dnnResourceSrc] = user_fwd_input;
- }
+// Although MKL documentation for LRN does not specify setting/getting
+// of dnnResourceSrc and dnnResourceDst, Caffe code sets dnnResourceSrc.
+// So we set dnnResourceSrc here. But we do not know why we are setting
+// dnnResourceDst.
+#if 0
+ // NOTE: The code below is kept just so that we know how we should handle
+ // dnnResourceSrc if the primitive layout for dnnResourceSrc was supported.
+
+ if (!dnnLayoutCompare_F32(lt_internal_input,
+ static_cast<dnnLayout_t>inimage_shape.GetCurLayout())) {
+ AllocTmpBuffer(context, mkl_tmp_image_buf_tensor, lt_internal_input,
+ &res_lrn_bwd[dnnResourceSrc]);
+ inimage_shape.GetConvertedFlatData(lt_internal_input,
+ user_fwd_input,
+ res_lrn_bwd[dnnResourceSrc]);
+ } else {
+ res_lrn_bwd[dnnResourceSrc] = user_fwd_input;
+ }
+#endif
+
+ // Since we cannot get expected layout for dnnResourceSrc, we construct
+ // buffer using
+ // MKL format if input is in MKL format.
+ if (inimage_shape.IsMklTensor()) {
+ AllocTmpBuffer(context, mkl_tmp_image_buf_tensor,
+ (dnnLayout_t)inimage_shape.GetCurLayout(),
+ &res_lrn_bwd[dnnResourceSrc]);
} else {
- if (!dnnLayoutCompare_F32(
- lt_internal_input,
- static_cast<dnnLayout_t>(inimage_shape.GetCurLayout()))) {
- CHECK_EQ(dnnConversionCreate_F32(
- &convert_input,
- static_cast<dnnLayout_t>(inimage_shape.GetCurLayout()),
- lt_internal_input),
- E_SUCCESS);
+ res_lrn_bwd[dnnResourceSrc] = user_fwd_input;
+ }
- AllocTmpBuffer(context, mkl_tmp_image_buf_tensor, lt_internal_input,
- &res_lrn_bwd[dnnResourceSrc]);
- CHECK_EQ(dnnConversionExecute_F32(convert_input, user_fwd_input,
- res_lrn_bwd[dnnResourceSrc]),
- E_SUCCESS);
- dnnDelete_F32(convert_input);
- } else {
- res_lrn_bwd[dnnResourceSrc] = user_fwd_input;
- }
+ // Same comment as above.
+ if (outimage_shape.IsMklTensor()) {
+ AllocTmpBuffer(context, mkl_tmp_outimage_buf_tensor,
+ (dnnLayout_t)outimage_shape.GetCurLayout(),
+ &res_lrn_bwd[dnnResourceDst]);
+ } else {
+ res_lrn_bwd[dnnResourceDst] = user_fwd_output;
}
res_lrn_bwd[dnnResourceWorkspace] = workspace_buffer;
@@ -612,6 +628,8 @@ class MklLRNGradOp : public OpKernel {
// TODO(intelft) Check if we can use EigenLRNOp directly instead of making a
// copy.
void MklDefaultToEigen(OpKernelContext* context) {
+ // CHECK(false);
+
Tensor in_grads;
Tensor in_image;
Tensor out_image;
@@ -691,7 +709,7 @@ class MklLRNGradOp : public OpKernel {
Shard(worker_threads.num_threads, worker_threads.workers, nodes * batch,
depth * depth, shard);
}
-
+
// release mkl resources
void Mklcleanup() {
bool ingrad_in_mkl_format = ingrad_shape.IsMklTensor();