diff options
Diffstat (limited to 'tensorflow/core/kernels/mkl_lrn_op.cc')
-rw-r--r-- | tensorflow/core/kernels/mkl_lrn_op.cc | 112 |
1 files changed, 65 insertions, 47 deletions
diff --git a/tensorflow/core/kernels/mkl_lrn_op.cc b/tensorflow/core/kernels/mkl_lrn_op.cc index 07a7e6b5da..070aeff49f 100644 --- a/tensorflow/core/kernels/mkl_lrn_op.cc +++ b/tensorflow/core/kernels/mkl_lrn_op.cc @@ -22,6 +22,9 @@ limitations under the License. #define EIGEN_USE_THREADS #include <vector> +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "third_party/mkl/include/mkl_dnn.h" +#include "third_party/mkl/include/mkl_dnn_types.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" @@ -30,9 +33,6 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/util/mkl_util.h" #include "tensorflow/core/util/tensor_format.h" -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "third_party/mkl/include/mkl_dnn.h" -#include "third_party/mkl/include/mkl_dnn_types.h" #if !defined(IS_MOBILE_PLATFORM) #include "tensorflow/core/util/work_sharder.h" @@ -66,10 +66,11 @@ class MklLRNOp : public OpKernel { explicit MklLRNOp(OpKernelConstruction* context) : OpKernel(context) { int64 depth_radius64; OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64)); - OP_REQUIRES(context, FastBoundsCheck(depth_radius64, - std::numeric_limits<int>::max()), - errors::InvalidArgument("depth_radius = ", depth_radius64, - " larger than int max")); + OP_REQUIRES( + context, + FastBoundsCheck(depth_radius64, std::numeric_limits<int>::max()), + errors::InvalidArgument("depth_radius = ", depth_radius64, + " larger than int max")); depth_radius_ = static_cast<size_t>(depth_radius64); OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_)); @@ -92,9 +93,10 @@ class MklLRNOp : public OpKernel { : input.dims(); OP_REQUIRES(context, mkl_context.in_dims == 4, errors::InvalidArgument("input must be 4-dimensional")); - OP_REQUIRES(context, FastBoundsCheck(input.NumElements(), - std::numeric_limits<int>::max()), - errors::InvalidArgument("argument to LRN too large")); + OP_REQUIRES( + context, + FastBoundsCheck(input.NumElements(), std::numeric_limits<int>::max()), + errors::InvalidArgument("argument to LRN too large")); if (!input_in_mkl_format) { mkl_context.MklDefaultToEigen(context, depth_radius_, bias_, alpha_, @@ -102,6 +104,15 @@ class MklLRNOp : public OpKernel { return; } + // TODO(inteltf) MKL will support depth radius not equal to 2 in the future + if (depth_radius_ != 2) { + Tensor converted_tensor = + ConvertMklToTF<T>(context, input, mkl_context.input_shape); + mkl_context.MklDefaultToEigen(context, depth_radius_, bias_, alpha_, + beta_, converted_tensor); + return; + } + if (input_in_mkl_format) { // MKL supports normalization over channel dimension only if (mkl_context.input_shape.tf_dim_idx(mkl_context.in_dims - 1) == @@ -334,10 +345,11 @@ class MklLRNGradOp : public OpKernel { explicit MklLRNGradOp(OpKernelConstruction* context) : OpKernel(context) { int64 depth_radius64; OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64)); - OP_REQUIRES(context, FastBoundsCheck(depth_radius64, - std::numeric_limits<int>::max()), - errors::InvalidArgument("depth_radius = ", depth_radius64, - " larger than int max")); + OP_REQUIRES( + context, + FastBoundsCheck(depth_radius64, std::numeric_limits<int>::max()), + errors::InvalidArgument("depth_radius = ", depth_radius64, + " larger than int max")); depth_radius_ = static_cast<int>(depth_radius64); OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_)); OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_)); @@ -541,9 +553,6 @@ class MklLRNGradOp : public OpKernel { CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(<_bdw_input, lrn_bwd, dnnResourceDiffDst), E_SUCCESS); - CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(<_internal_input, lrn_bwd, - dnnResourceSrc), - E_SUCCESS); bool ingrad_in_mkl_format = ingrad_shape.IsMklTensor(); if (ingrad_in_mkl_format) { @@ -572,37 +581,44 @@ class MklLRNGradOp : public OpKernel { } } - bool inimage_in_mkl_format = inimage_shape.IsMklTensor(); - if (inimage_in_mkl_format) { - if (!dnnLayoutCompare_F32( - lt_internal_input, - static_cast<dnnLayout_t>(inimage_shape.GetCurLayout()))) { - AllocTmpBuffer(context, mkl_tmp_image_buf_tensor, lt_internal_input, - &res_lrn_bwd[dnnResourceSrc]); - ingrad_shape.GetConvertedFlatData(lt_internal_input, user_fwd_input, - res_lrn_bwd[dnnResourceSrc]); - } else { - res_lrn_bwd[dnnResourceSrc] = user_fwd_input; - } +// Although MKL documentation for LRN does not specify setting/getting +// of dnnResourceSrc and dnnResourceDst, Caffe code sets dnnResourceSrc. +// So we set dnnResourceSrc here. But we do not know why we are setting +// dnnResourceDst. +#if 0 + // NOTE: The code below is kept just so that we know how we should handle + // dnnResourceSrc if the primitive layout for dnnResourceSrc was supported. + + if (!dnnLayoutCompare_F32(lt_internal_input, + static_cast<dnnLayout_t>inimage_shape.GetCurLayout())) { + AllocTmpBuffer(context, mkl_tmp_image_buf_tensor, lt_internal_input, + &res_lrn_bwd[dnnResourceSrc]); + inimage_shape.GetConvertedFlatData(lt_internal_input, + user_fwd_input, + res_lrn_bwd[dnnResourceSrc]); + } else { + res_lrn_bwd[dnnResourceSrc] = user_fwd_input; + } +#endif + + // Since we cannot get expected layout for dnnResourceSrc, we construct + // buffer using + // MKL format if input is in MKL format. + if (inimage_shape.IsMklTensor()) { + AllocTmpBuffer(context, mkl_tmp_image_buf_tensor, + (dnnLayout_t)inimage_shape.GetCurLayout(), + &res_lrn_bwd[dnnResourceSrc]); } else { - if (!dnnLayoutCompare_F32( - lt_internal_input, - static_cast<dnnLayout_t>(inimage_shape.GetCurLayout()))) { - CHECK_EQ(dnnConversionCreate_F32( - &convert_input, - static_cast<dnnLayout_t>(inimage_shape.GetCurLayout()), - lt_internal_input), - E_SUCCESS); + res_lrn_bwd[dnnResourceSrc] = user_fwd_input; + } - AllocTmpBuffer(context, mkl_tmp_image_buf_tensor, lt_internal_input, - &res_lrn_bwd[dnnResourceSrc]); - CHECK_EQ(dnnConversionExecute_F32(convert_input, user_fwd_input, - res_lrn_bwd[dnnResourceSrc]), - E_SUCCESS); - dnnDelete_F32(convert_input); - } else { - res_lrn_bwd[dnnResourceSrc] = user_fwd_input; - } + // Same comment as above. + if (outimage_shape.IsMklTensor()) { + AllocTmpBuffer(context, mkl_tmp_outimage_buf_tensor, + (dnnLayout_t)outimage_shape.GetCurLayout(), + &res_lrn_bwd[dnnResourceDst]); + } else { + res_lrn_bwd[dnnResourceDst] = user_fwd_output; } res_lrn_bwd[dnnResourceWorkspace] = workspace_buffer; @@ -612,6 +628,8 @@ class MklLRNGradOp : public OpKernel { // TODO(intelft) Check if we can use EigenLRNOp directly instead of making a // copy. void MklDefaultToEigen(OpKernelContext* context) { + // CHECK(false); + Tensor in_grads; Tensor in_image; Tensor out_image; @@ -691,7 +709,7 @@ class MklLRNGradOp : public OpKernel { Shard(worker_threads.num_threads, worker_threads.workers, nodes * batch, depth * depth, shard); } - + // release mkl resources void Mklcleanup() { bool ingrad_in_mkl_format = ingrad_shape.IsMklTensor(); |