diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2018-01-26 11:59:56 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-01-26 12:03:52 -0800 |
commit | 0f65c8f572201f8838189f3e3c3e455759112c14 (patch) | |
tree | 55d3ad38ebe720d5c3edb8d0722b55d4cd778e3e /tensorflow/core/kernels/mkl_lrn_op.cc | |
parent | ff6463f4277f412b98d6e6bb1283841ff66902de (diff) |
Cleanup: Ran clang-format on all *.{cc,h} files in tensorflow/core/kernels.
PiperOrigin-RevId: 183423961
Diffstat (limited to 'tensorflow/core/kernels/mkl_lrn_op.cc')
-rw-r--r-- | tensorflow/core/kernels/mkl_lrn_op.cc | 647 |
1 files changed, 305 insertions, 342 deletions
diff --git a/tensorflow/core/kernels/mkl_lrn_op.cc b/tensorflow/core/kernels/mkl_lrn_op.cc index 95e0404ba8..a8b45004b7 100644 --- a/tensorflow/core/kernels/mkl_lrn_op.cc +++ b/tensorflow/core/kernels/mkl_lrn_op.cc @@ -22,6 +22,9 @@ limitations under the License. #define EIGEN_USE_THREADS #include <vector> +#include "mkl_dnn.h" +#include "mkl_dnn_types.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" @@ -30,9 +33,6 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/util/mkl_util.h" #include "tensorflow/core/util/tensor_format.h" -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "mkl_dnn.h" -#include "mkl_dnn_types.h" #if !defined(IS_MOBILE_PLATFORM) #include "tensorflow/core/util/work_sharder.h" @@ -40,10 +40,10 @@ limitations under the License. #ifdef INTEL_MKL_DNN #include "mkldnn.hpp" -using mkldnn::lrn_forward; +using mkldnn::lrn_across_channels; using mkldnn::lrn_backward; +using mkldnn::lrn_forward; using mkldnn::prop_kind; -using mkldnn::lrn_across_channels; using mkldnn::stream; #endif @@ -77,10 +77,11 @@ class MklLRNOp : public OpKernel { explicit MklLRNOp(OpKernelConstruction* context) : OpKernel(context) { int64 depth_radius64; OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64)); - OP_REQUIRES(context, FastBoundsCheck(depth_radius64, - std::numeric_limits<int>::max()), - errors::InvalidArgument("depth_radius = ", depth_radius64, - " larger than int max")); + OP_REQUIRES( + context, + FastBoundsCheck(depth_radius64, std::numeric_limits<int>::max()), + errors::InvalidArgument("depth_radius = ", depth_radius64, + " larger than int max")); depth_radius_ = static_cast<size_t>(depth_radius64); OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_)); @@ -103,9 +104,10 @@ class MklLRNOp : public OpKernel { : input.dims(); OP_REQUIRES(context, mkl_context.in_dims == 4, errors::InvalidArgument("input must be 4-dimensional")); - OP_REQUIRES(context, FastBoundsCheck(input.NumElements(), - std::numeric_limits<int>::max()), - errors::InvalidArgument("argument to LRN too large")); + OP_REQUIRES( + context, + FastBoundsCheck(input.NumElements(), std::numeric_limits<int>::max()), + errors::InvalidArgument("argument to LRN too large")); if (!input_in_mkl_format) { mkl_context.MklDefaultToEigen(context, depth_radius_, bias_, alpha_, @@ -339,17 +341,17 @@ class MklLRNOp : public OpKernel { float beta_; }; - template <typename T> class MklLRNGradOp : public OpKernel { public: explicit MklLRNGradOp(OpKernelConstruction* context) : OpKernel(context) { int64 depth_radius64; OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64)); - OP_REQUIRES(context, FastBoundsCheck(depth_radius64, - std::numeric_limits<int>::max()), - errors::InvalidArgument("depth_radius = ", depth_radius64, - " larger than int max")); + OP_REQUIRES( + context, + FastBoundsCheck(depth_radius64, std::numeric_limits<int>::max()), + errors::InvalidArgument("depth_radius = ", depth_radius64, + " larger than int max")); depth_radius_ = static_cast<int>(depth_radius64); OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_)); OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_)); @@ -740,10 +742,11 @@ class MklLRNOp : public OpKernel { explicit MklLRNOp(OpKernelConstruction* context) : OpKernel(context) { int64 depth_radius64; OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64)); - OP_REQUIRES(context, FastBoundsCheck(depth_radius64, - std::numeric_limits<int>::max()), - errors::InvalidArgument("depth_radius = ", depth_radius64, - " larger than int max")); + OP_REQUIRES( + context, + FastBoundsCheck(depth_radius64, std::numeric_limits<int>::max()), + errors::InvalidArgument("depth_radius = ", depth_radius64, + " larger than int max")); depth_radius_ = static_cast<size_t>(depth_radius64); OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_)); @@ -773,10 +776,10 @@ class MklLRNOp : public OpKernel { if (!src_dnn_shape.IsMklTensor()) { MklDefaultToEigen(context, src_tensor); return; - } else if (!src_dnn_shape.IsMklChannelDim( - src_dnn_shape.GetDimension() - 1) ) { + } else if (!src_dnn_shape.IsMklChannelDim(src_dnn_shape.GetDimension() - + 1)) { Tensor converted_tensor = - ConvertMklToTF<T>(context, src_tensor, src_dnn_shape); + ConvertMklToTF<T>(context, src_tensor, src_dnn_shape); MklDefaultToEigen(context, converted_tensor); return; } @@ -807,18 +810,16 @@ class MklLRNOp : public OpKernel { // Create LRN primitive descriptor. // Tensorflow's normalization semantics is across channels. // MKL-DNN also supports normalization within channel. - auto lrn_desc = lrn_forward::desc(prop_kind::forward, - lrn_across_channels, + auto lrn_desc = lrn_forward::desc(prop_kind::forward, lrn_across_channels, src_dnn_data.GetUsrMemDesc(), - kernel_size, - new_alpha, beta_, bias_); + kernel_size, new_alpha, beta_, bias_); auto lrn_prim_desc = lrn_forward::primitive_desc(lrn_desc, cpu_engine); // Allocate output_dnn_data tensor. Tensor* output_tensor = nullptr; memory::format input_format = src_dnn_shape.GetTfDataFormat(); - AllocateOutputTensor(context, lrn_prim_desc, input_dims, - input_format, &output_tensor); + AllocateOutputTensor(context, lrn_prim_desc, input_dims, input_format, + &output_tensor); OP_REQUIRES_OK(context, context->status()); CHECK_NOTNULL(output_tensor); dst_dnn_data.SetUsrMemDataHandle(output_tensor); @@ -827,25 +828,23 @@ class MklLRNOp : public OpKernel { AllocateWorkspaceTensor(context, lrn_prim_desc, &workspace_dnn_data); OP_REQUIRES_OK(context, context->status()); - PrepareAndExecuteNet(lrn_prim_desc, &src_dnn_data, - &dst_dnn_data, &workspace_dnn_data); - } catch (mkldnn::error &e) { + PrepareAndExecuteNet(lrn_prim_desc, &src_dnn_data, &dst_dnn_data, + &workspace_dnn_data); + } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + - ", in file " + string(__FILE__) + ":" + - std::to_string(__LINE__); - OP_REQUIRES_OK(context, - errors::Aborted("Operation received an exception:", - error_msg)); + ", message: " + string(e.message) + ", in file " + + string(__FILE__) + ":" + std::to_string(__LINE__); + OP_REQUIRES_OK( + context, + errors::Aborted("Operation received an exception:", error_msg)); } } private: - void PrepareAndExecuteNet( - const lrn_forward::primitive_desc& lrn_fwd_desc, - MklDnnData<T>* src_dnn_data, - MklDnnData<T>* dst_dnn_data, - MklDnnData<uint8>* wksp_dnn_data = nullptr) { + void PrepareAndExecuteNet(const lrn_forward::primitive_desc& lrn_fwd_desc, + MklDnnData<T>* src_dnn_data, + MklDnnData<T>* dst_dnn_data, + MklDnnData<uint8>* wksp_dnn_data = nullptr) { std::vector<primitive> net; // Check for input reorder @@ -853,23 +852,21 @@ class MklLRNOp : public OpKernel { // Create pooling primitive and add it to net if (wksp_dnn_data != nullptr) { - net.push_back(lrn_forward(lrn_fwd_desc, - src_dnn_data->GetOpMem(), - wksp_dnn_data->GetOpMem(), - dst_dnn_data->GetOpMem())); + net.push_back(lrn_forward(lrn_fwd_desc, src_dnn_data->GetOpMem(), + wksp_dnn_data->GetOpMem(), + dst_dnn_data->GetOpMem())); } else { - net.push_back(lrn_forward(lrn_fwd_desc, - src_dnn_data->GetOpMem(), - dst_dnn_data->GetOpMem())); + net.push_back(lrn_forward(lrn_fwd_desc, src_dnn_data->GetOpMem(), + dst_dnn_data->GetOpMem())); } stream(stream::kind::eager).submit(net).wait(); } - void AllocateOutputTensor(OpKernelContext* context, - const lrn_forward::primitive_desc& lrn_fwd_prim_desc, - const memory::dims output_dims_mkl_order, - const memory::format& output_tf_format, - Tensor** output_tensor) { + void AllocateOutputTensor( + OpKernelContext* context, + const lrn_forward::primitive_desc& lrn_fwd_prim_desc, + const memory::dims output_dims_mkl_order, + const memory::format& output_tf_format, Tensor** output_tensor) { CHECK_NOTNULL(output_tensor); memory::primitive_desc dst_pd = lrn_fwd_prim_desc.dst_primitive_desc(); @@ -880,111 +877,106 @@ class MklLRNOp : public OpKernel { output_mkl_shape.SetMklLayout(&dst_pd); output_mkl_shape.SetElemType(MklDnnType<T>()); output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(), - output_dims_mkl_order, - output_tf_format); + output_dims_mkl_order, output_tf_format); TensorShape output_tf_shape; // only allocate enough space for the elements we need. size_t num_bytes = dst_pd.get_size(); CHECK_EQ(num_bytes % sizeof(T), 0); output_tf_shape.AddDim(num_bytes / sizeof(T)); - AllocateOutputSetMklShape(context, kIdxOutput, - output_tensor, - output_tf_shape, output_mkl_shape); - } - - // Fallback implementation - Taken from lrn_op.cc - // TODO(inteltf) Check if we can use EigenLRNOp directly instead of making a - // copy. - void MklDefaultToEigen(OpKernelContext* context, - const Tensor& input) { - const int batch = static_cast<int>(input.dim_size(0)); - const int rows = static_cast<int>(input.dim_size(1)); - const int cols = static_cast<int>(input.dim_size(2)); - const int depth = static_cast<int>(input.dim_size(3)); - const int nodes = cols * rows; - - auto in_shaped = input.shaped<T, 2>({nodes * batch, depth}); - // Multiplying the input with the band matrix has the effect of reducing - // the - // correct patch along the depth. - Eigen::Tensor<T, 2, Eigen::RowMajor> multiplier(depth, depth); - GetBandMatrix<T>(depth, depth_radius_, &multiplier); + AllocateOutputSetMklShape(context, kIdxOutput, output_tensor, + output_tf_shape, output_mkl_shape); + } - Tensor *output_dnn_data = nullptr; - MklDnnShape mkl_output_mkl_shape; - mkl_output_mkl_shape.SetMklTensor(false); - mkl_output_mkl_shape.SetDimensions(4); - AllocateOutputSetMklShape(context, kIdxOutput, &output_dnn_data, - input.shape(), mkl_output_mkl_shape); - CHECK_NOTNULL(output_dnn_data); - - Tensor* workspace_tensor = nullptr; - MklDnnShape workspace_mkl_shape; - workspace_mkl_shape.SetMklTensor(false); - TensorShape workspace_tf_shape; - workspace_tf_shape.AddDim(0); - AllocateOutputSetMklShape(context, kIdxWorkspace, - &workspace_tensor, + // Fallback implementation - Taken from lrn_op.cc + // TODO(inteltf) Check if we can use EigenLRNOp directly instead of making a + // copy. + void MklDefaultToEigen(OpKernelContext* context, const Tensor& input) { + const int batch = static_cast<int>(input.dim_size(0)); + const int rows = static_cast<int>(input.dim_size(1)); + const int cols = static_cast<int>(input.dim_size(2)); + const int depth = static_cast<int>(input.dim_size(3)); + const int nodes = cols * rows; + + auto in_shaped = input.shaped<T, 2>({nodes * batch, depth}); + // Multiplying the input with the band matrix has the effect of reducing + // the + // correct patch along the depth. + Eigen::Tensor<T, 2, Eigen::RowMajor> multiplier(depth, depth); + GetBandMatrix<T>(depth, depth_radius_, &multiplier); + + Tensor* output_dnn_data = nullptr; + MklDnnShape mkl_output_mkl_shape; + mkl_output_mkl_shape.SetMklTensor(false); + mkl_output_mkl_shape.SetDimensions(4); + AllocateOutputSetMklShape(context, kIdxOutput, &output_dnn_data, + input.shape(), mkl_output_mkl_shape); + CHECK_NOTNULL(output_dnn_data); + + Tensor* workspace_tensor = nullptr; + MklDnnShape workspace_mkl_shape; + workspace_mkl_shape.SetMklTensor(false); + TensorShape workspace_tf_shape; + workspace_tf_shape.AddDim(0); + AllocateOutputSetMklShape(context, kIdxWorkspace, &workspace_tensor, workspace_tf_shape, workspace_mkl_shape); - CHECK_NOTNULL(workspace_tensor); - - auto out_shaped = output_dnn_data->shaped<T, 2>({nodes * batch, depth}); - Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}}; - auto tmp = in_shaped.square().contract(multiplier, dims) * alpha_ + bias_; - if (beta_ == T(1)) { - out_shaped.device(context->eigen_cpu_device()) = - in_shaped * tmp.inverse(); - } else if (beta_ == T(0.5)) { - out_shaped.device(context->eigen_cpu_device()) = - in_shaped * tmp.rsqrt(); - } else { - out_shaped.device(context->eigen_cpu_device()) = - in_shaped * (tmp.log() * -beta_).exp(); - } + CHECK_NOTNULL(workspace_tensor); + + auto out_shaped = output_dnn_data->shaped<T, 2>({nodes * batch, depth}); + Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}}; + auto tmp = in_shaped.square().contract(multiplier, dims) * alpha_ + bias_; + if (beta_ == T(1)) { + out_shaped.device(context->eigen_cpu_device()) = + in_shaped * tmp.inverse(); + } else if (beta_ == T(0.5)) { + out_shaped.device(context->eigen_cpu_device()) = in_shaped * tmp.rsqrt(); + } else { + out_shaped.device(context->eigen_cpu_device()) = + in_shaped * (tmp.log() * -beta_).exp(); } + } - void AllocateWorkspaceTensor(OpKernelContext* context, - const lrn_forward::primitive_desc& lrn_fwd_prim_desc, - MklDnnData<uint8>* dnn_data_wksp) { - CHECK_NOTNULL(dnn_data_wksp); - Tensor* workspace_tensor = nullptr; - memory::primitive_desc workspace_pd - = lrn_fwd_prim_desc.workspace_primitive_desc(); - size_t workspace_bytes = workspace_pd.get_size(); - MklDnnShape workspace_mkl_shape; - // the workspace tensor is a uint8 tensor that has - // exactly the number of bytes necessary - workspace_mkl_shape.SetMklTensor(false); - TensorShape workspace_tf_shape; - workspace_tf_shape.AddDim(workspace_bytes); - AllocateOutputSetMklShape(context, kIdxWorkspace, - &workspace_tensor, + void AllocateWorkspaceTensor( + OpKernelContext* context, + const lrn_forward::primitive_desc& lrn_fwd_prim_desc, + MklDnnData<uint8>* dnn_data_wksp) { + CHECK_NOTNULL(dnn_data_wksp); + Tensor* workspace_tensor = nullptr; + memory::primitive_desc workspace_pd = + lrn_fwd_prim_desc.workspace_primitive_desc(); + size_t workspace_bytes = workspace_pd.get_size(); + MklDnnShape workspace_mkl_shape; + // the workspace tensor is a uint8 tensor that has + // exactly the number of bytes necessary + workspace_mkl_shape.SetMklTensor(false); + TensorShape workspace_tf_shape; + workspace_tf_shape.AddDim(workspace_bytes); + AllocateOutputSetMklShape(context, kIdxWorkspace, &workspace_tensor, workspace_tf_shape, workspace_mkl_shape); - CHECK_NOTNULL(workspace_tensor); - dnn_data_wksp->SetUsrMem(workspace_pd, workspace_tensor); - } + CHECK_NOTNULL(workspace_tensor); + dnn_data_wksp->SetUsrMem(workspace_pd, workspace_tensor); + } void SanityCheckInputs(OpKernelContext* context) { const Tensor& src_tensor = MklGetInput(context, kIdxInput); MklDnnShape src_dnn_shape; GetMklShape(context, kIdxInput, &src_dnn_shape); if (src_dnn_shape.IsMklTensor()) { - OP_REQUIRES(context, src_dnn_shape.GetDimension() == 4, - errors::InvalidArgument("input must be 4-dimensional")); - OP_REQUIRES(context, FastBoundsCheck(src_tensor.NumElements(), - std::numeric_limits<int>::max()), - errors::InvalidArgument("argument to LRN too large")); + OP_REQUIRES(context, src_dnn_shape.GetDimension() == 4, + errors::InvalidArgument("input must be 4-dimensional")); + OP_REQUIRES(context, + FastBoundsCheck(src_tensor.NumElements(), + std::numeric_limits<int>::max()), + errors::InvalidArgument("argument to LRN too large")); } else { - OP_REQUIRES(context, src_tensor.dims() == 4, - errors::InvalidArgument("input must be 4-dimensional")); - OP_REQUIRES(context, FastBoundsCheck(src_tensor.NumElements(), - std::numeric_limits<int>::max()), - errors::InvalidArgument("argument to LRN too large")); + OP_REQUIRES(context, src_tensor.dims() == 4, + errors::InvalidArgument("input must be 4-dimensional")); + OP_REQUIRES(context, + FastBoundsCheck(src_tensor.NumElements(), + std::numeric_limits<int>::max()), + errors::InvalidArgument("argument to LRN too large")); } } - const int kIdxInput = 0, - kIdxOutput = 0, - kIdxWorkspace = 1; + const int kIdxInput = 0, kIdxOutput = 0, kIdxWorkspace = 1; typedef typename Eigen::Tensor<T, 1, Eigen::RowMajor>::DimensionPair DimPair; bool workspace_enabled_; @@ -994,17 +986,17 @@ class MklLRNOp : public OpKernel { float beta_; }; - template <typename T> class MklLRNGradOp : public OpKernel { public: explicit MklLRNGradOp(OpKernelConstruction* context) : OpKernel(context) { int64 depth_radius64; OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64)); - OP_REQUIRES(context, FastBoundsCheck(depth_radius64, - std::numeric_limits<int>::max()), - errors::InvalidArgument("depth_radius = ", depth_radius64, - " larger than int max")); + OP_REQUIRES( + context, + FastBoundsCheck(depth_radius64, std::numeric_limits<int>::max()), + errors::InvalidArgument("depth_radius = ", depth_radius64, + " larger than int max")); depth_radius_ = static_cast<int>(depth_radius64); OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_)); OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_)); @@ -1025,7 +1017,7 @@ class MklLRNGradOp : public OpKernel { MklDnnData<T> output_dnn_data(&cpu_engine); MklDnnShape input_grad_dnn_shape, orig_input_dnn_shape, - orig_output_dnn_shape; + orig_output_dnn_shape; GetMklShape(context, kIdxGradient, &input_grad_dnn_shape); GetMklShape(context, kIdxOrigInput, &orig_input_dnn_shape); GetMklShape(context, kIdxOrigOutput, &orig_output_dnn_shape); @@ -1037,16 +1029,16 @@ class MklLRNGradOp : public OpKernel { orig_input_dnn_shape.IsMklTensor() && orig_output_dnn_shape.IsMklTensor() && input_grad_dnn_shape.IsMklChannelDim( - input_grad_dnn_shape.GetDimension() - 1) && + input_grad_dnn_shape.GetDimension() - 1) && orig_input_dnn_shape.IsMklChannelDim( - orig_input_dnn_shape.GetDimension() - 1) && + orig_input_dnn_shape.GetDimension() - 1) && orig_output_dnn_shape.IsMklChannelDim( - orig_output_dnn_shape.GetDimension() - 1); + orig_output_dnn_shape.GetDimension() - 1); if (!can_use_mkldnn) { - // Fallback to eigen - MklDefaultToEigen(context); - return; + // Fallback to eigen + MklDefaultToEigen(context); + return; } // At this point, we have the all clear to use MklDnn constructs // Naming: diff_dst is input_gradient_tensor; src is orig_input_tensor. @@ -1059,13 +1051,11 @@ class MklLRNGradOp : public OpKernel { // NHWC format. memory::desc original_output_md = orig_output_dnn_shape.GetCurLayout(); memory::desc target_diff_dst_md = ConfigureInputGradient( - input_grad_tensor, - input_grad_dnn_shape, - &input_grad_dnn_data); + input_grad_tensor, input_grad_dnn_shape, &input_grad_dnn_data); memory::desc orig_input_md = orig_input_dnn_shape.GetCurLayout(); memory::dims orig_input_dims = - orig_input_dnn_shape.GetSizesAsMklDnnDims(); + orig_input_dnn_shape.GetSizesAsMklDnnDims(); orig_input_dnn_data.SetUsrMem(orig_input_md, &orig_input_tensor); orig_input_dnn_data.SetOpMemDesc(orig_input_dims, memory::format::nhwc); @@ -1079,27 +1069,21 @@ class MklLRNGradOp : public OpKernel { // Create LRN backward primitive descriptor. It requires LRN forward // primitive descriptor also. - auto lrn_fwd_desc = lrn_forward::desc(prop_kind::forward, - lrn_across_channels, - orig_input_md, - kernel_size, - new_alpha, beta_, bias_); - auto lrn_fwd_prim_desc = lrn_forward::primitive_desc(lrn_fwd_desc, - cpu_engine); - auto lrn_bwd_desc = lrn_backward::desc(lrn_across_channels, - original_output_md, - target_diff_dst_md, - kernel_size, - new_alpha, beta_, bias_); - auto lrn_bwd_prim_desc = lrn_backward::primitive_desc(lrn_bwd_desc, - cpu_engine, - lrn_fwd_prim_desc); + auto lrn_fwd_desc = lrn_forward::desc( + prop_kind::forward, lrn_across_channels, orig_input_md, kernel_size, + new_alpha, beta_, bias_); + auto lrn_fwd_prim_desc = + lrn_forward::primitive_desc(lrn_fwd_desc, cpu_engine); + auto lrn_bwd_desc = lrn_backward::desc( + lrn_across_channels, original_output_md, target_diff_dst_md, + kernel_size, new_alpha, beta_, bias_); + auto lrn_bwd_prim_desc = lrn_backward::primitive_desc( + lrn_bwd_desc, cpu_engine, lrn_fwd_prim_desc); Tensor* output_tensor = nullptr; - memory::format orig_input_format - = orig_input_dnn_shape.GetTfDataFormat(); - AllocateOutputTensor(context, lrn_bwd_prim_desc, - orig_input_dims, orig_input_format, &output_tensor); + memory::format orig_input_format = orig_input_dnn_shape.GetTfDataFormat(); + AllocateOutputTensor(context, lrn_bwd_prim_desc, orig_input_dims, + orig_input_format, &output_tensor); OP_REQUIRES_OK(context, context->status()); CHECK_NOTNULL(output_tensor); output_dnn_data.SetUsrMemDataHandle(output_tensor); @@ -1110,35 +1094,32 @@ class MklLRNGradOp : public OpKernel { const Tensor& workspace_tensor = MklGetInput(context, kIdxWorkspace); MklDnnData<uint8> workspace_dnn_data(&cpu_engine); ConfigureWorkspace(workspace_tensor, - lrn_fwd_prim_desc.workspace_primitive_desc(), - &workspace_dnn_data); - - PrepareAndExecuteNet(lrn_bwd_prim_desc, - lrn_fwd_prim_desc, - &orig_input_dnn_data, - &input_grad_dnn_data, - &output_dnn_data, - memory::primitive_desc(target_diff_dst_md, cpu_engine), - &workspace_dnn_data); - } catch (mkldnn::error &e) { + lrn_fwd_prim_desc.workspace_primitive_desc(), + &workspace_dnn_data); + + PrepareAndExecuteNet( + lrn_bwd_prim_desc, lrn_fwd_prim_desc, &orig_input_dnn_data, + &input_grad_dnn_data, &output_dnn_data, + memory::primitive_desc(target_diff_dst_md, cpu_engine), + &workspace_dnn_data); + } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + - ", in file " + string(__FILE__) + ":" + - std::to_string(__LINE__); - OP_REQUIRES_OK(context, - errors::Aborted("Operation received an exception:", - error_msg)); + ", message: " + string(e.message) + ", in file " + + string(__FILE__) + ":" + std::to_string(__LINE__); + OP_REQUIRES_OK( + context, + errors::Aborted("Operation received an exception:", error_msg)); } } - void AllocateOutputTensor(OpKernelContext* context, - const lrn_backward::primitive_desc& lrn_bkwd_prim_desc, - const memory::dims output_dims_mkl_order, - const memory::format& output_tf_format, - Tensor** output_tensor) { + void AllocateOutputTensor( + OpKernelContext* context, + const lrn_backward::primitive_desc& lrn_bkwd_prim_desc, + const memory::dims output_dims_mkl_order, + const memory::format& output_tf_format, Tensor** output_tensor) { CHECK_NOTNULL(output_tensor); - memory::primitive_desc dst_pd - = lrn_bkwd_prim_desc.diff_src_primitive_desc(); + memory::primitive_desc dst_pd = + lrn_bkwd_prim_desc.diff_src_primitive_desc(); MklDnnShape output_mkl_shape; // We assume that all outputs at this point are MKL Tensors @@ -1146,170 +1127,153 @@ class MklLRNGradOp : public OpKernel { output_mkl_shape.SetMklLayout(&dst_pd); output_mkl_shape.SetElemType(MklDnnType<T>()); output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(), - output_dims_mkl_order, - output_tf_format); + output_dims_mkl_order, output_tf_format); TensorShape output_tf_shape; size_t num_bytes = dst_pd.get_size(); CHECK_EQ(num_bytes % sizeof(T), 0); output_tf_shape.AddDim(num_bytes / sizeof(T)); - AllocateOutputSetMklShape(context, kIdxOutput, - output_tensor, - output_tf_shape, output_mkl_shape); + AllocateOutputSetMklShape(context, kIdxOutput, output_tensor, + output_tf_shape, output_mkl_shape); } memory::desc ConfigureInputGradient(const Tensor& input_grad_tensor, - const MklDnnShape& input_grad_dnn_shape, - MklDnnData<T> *input_grad_dnn_data) { + const MklDnnShape& input_grad_dnn_shape, + MklDnnData<T>* input_grad_dnn_data) { CHECK_NOTNULL(input_grad_dnn_data); // This shouldn't be necessary at this point, but just in case CHECK_EQ(input_grad_dnn_shape.IsMklTensor(), true); memory::desc input_grad_md = input_grad_dnn_shape.GetCurLayout(); - memory::dims orig_input_dims = - input_grad_dnn_shape.GetSizesAsMklDnnDims(); + memory::dims orig_input_dims = input_grad_dnn_shape.GetSizesAsMklDnnDims(); input_grad_dnn_data->SetUsrMem(input_grad_md, &input_grad_tensor); input_grad_dnn_data->SetOpMemDesc(orig_input_dims, memory::format::nhwc); return input_grad_md; } void PrepareAndExecuteNet( - const lrn_backward::primitive_desc& lrn_bkwd_desc, - const lrn_forward::primitive_desc& lrn_fwd_desc, - MklDnnData<T>* src_dnn_data, - MklDnnData<T>* input_gradient_diff_dst, - MklDnnData<T>* output_diff_src, - const memory::primitive_desc& target_diff_dst_pd, - const MklDnnData<uint8>* workspace_dnn_data = nullptr) { + const lrn_backward::primitive_desc& lrn_bkwd_desc, + const lrn_forward::primitive_desc& lrn_fwd_desc, + MklDnnData<T>* src_dnn_data, MklDnnData<T>* input_gradient_diff_dst, + MklDnnData<T>* output_diff_src, + const memory::primitive_desc& target_diff_dst_pd, + const MklDnnData<uint8>* workspace_dnn_data = nullptr) { std::vector<primitive> net; // Check for input reordering on the diff dst input input_gradient_diff_dst->CheckReorderToOpMem( - lrn_bkwd_desc.diff_dst_primitive_desc(), &net); + lrn_bkwd_desc.diff_dst_primitive_desc(), &net); // Check for input reordering on the original input - src_dnn_data->CheckReorderToOpMem(lrn_fwd_desc.src_primitive_desc(), - &net); + src_dnn_data->CheckReorderToOpMem(lrn_fwd_desc.src_primitive_desc(), &net); // Create pooling primitive and add it to net if (nullptr == workspace_dnn_data) { - net.push_back(lrn_backward(lrn_bkwd_desc, - src_dnn_data->GetOpMem(), - input_gradient_diff_dst->GetOpMem(), - output_diff_src->GetOpMem())); + net.push_back(lrn_backward(lrn_bkwd_desc, src_dnn_data->GetOpMem(), + input_gradient_diff_dst->GetOpMem(), + output_diff_src->GetOpMem())); } else { - net.push_back(lrn_backward(lrn_bkwd_desc, - src_dnn_data->GetOpMem(), - input_gradient_diff_dst->GetOpMem(), - workspace_dnn_data->GetOpMem(), - output_diff_src->GetOpMem())); + net.push_back(lrn_backward(lrn_bkwd_desc, src_dnn_data->GetOpMem(), + input_gradient_diff_dst->GetOpMem(), + workspace_dnn_data->GetOpMem(), + output_diff_src->GetOpMem())); } stream(stream::kind::eager).submit(net).wait(); } void ConfigureWorkspace(const Tensor& workspace_tensor, - memory::primitive_desc workspace_pd, - MklDnnData<uint8> *workspace_dnn_data) { + memory::primitive_desc workspace_pd, + MklDnnData<uint8>* workspace_dnn_data) { CHECK_NOTNULL(workspace_dnn_data); workspace_dnn_data->SetUsrMem(workspace_pd, &workspace_tensor); } - // Fallback implementation - Taken from lrn_op.cc - // TODO(intelft) Check if we can use EigenLRNOp directly instead of making a - // copy. - void MklDefaultToEigen(OpKernelContext* context) { - Tensor input_gradient_tensor; - Tensor orig_input_tensor; - Tensor orig_output_tensor; - - MklDnnShape input_grad_dnn_shape, orig_input_dnn_shape, - orig_output_dnn_shape; - GetMklShape(context, kIdxGradient, &input_grad_dnn_shape); - GetMklShape(context, kIdxOrigInput, &orig_input_dnn_shape); - GetMklShape(context, kIdxOrigOutput, &orig_output_dnn_shape); - - if (input_grad_dnn_shape.IsMklTensor()) { - input_gradient_tensor = - ConvertMklToTF<T>(context, - MklGetInput(context, kIdxGradient), - input_grad_dnn_shape); - } else { - input_gradient_tensor = MklGetInput(context, kIdxGradient); - } - - if (orig_input_dnn_shape.IsMklTensor()) { - orig_input_tensor = - ConvertMklToTF<T>(context, - MklGetInput(context, kIdxOrigInput), - orig_input_dnn_shape); - } else { - orig_input_tensor = MklGetInput(context, kIdxOrigInput); - } + // Fallback implementation - Taken from lrn_op.cc + // TODO(intelft) Check if we can use EigenLRNOp directly instead of making a + // copy. + void MklDefaultToEigen(OpKernelContext* context) { + Tensor input_gradient_tensor; + Tensor orig_input_tensor; + Tensor orig_output_tensor; + + MklDnnShape input_grad_dnn_shape, orig_input_dnn_shape, + orig_output_dnn_shape; + GetMklShape(context, kIdxGradient, &input_grad_dnn_shape); + GetMklShape(context, kIdxOrigInput, &orig_input_dnn_shape); + GetMklShape(context, kIdxOrigOutput, &orig_output_dnn_shape); + + if (input_grad_dnn_shape.IsMklTensor()) { + input_gradient_tensor = ConvertMklToTF<T>( + context, MklGetInput(context, kIdxGradient), input_grad_dnn_shape); + } else { + input_gradient_tensor = MklGetInput(context, kIdxGradient); + } - if (orig_output_dnn_shape.IsMklTensor()) { - orig_output_tensor = - ConvertMklToTF<T>(context, - MklGetInput(context, kIdxOrigOutput), - orig_output_dnn_shape); - } else { - orig_output_tensor = MklGetInput(context, kIdxOrigOutput); - } + if (orig_input_dnn_shape.IsMklTensor()) { + orig_input_tensor = ConvertMklToTF<T>( + context, MklGetInput(context, kIdxOrigInput), orig_input_dnn_shape); + } else { + orig_input_tensor = MklGetInput(context, kIdxOrigInput); + } - const int64 batch = static_cast<int64>(input_gradient_tensor.dim_size(0)); - const int64 rows = static_cast<int64>(input_gradient_tensor.dim_size(1)); - const int64 cols = static_cast<int64>(input_gradient_tensor.dim_size(2)); - const int64 depth = static_cast<int64>(input_gradient_tensor.dim_size(3)); - const auto nodes = cols * rows; + if (orig_output_dnn_shape.IsMklTensor()) { + orig_output_tensor = ConvertMklToTF<T>( + context, MklGetInput(context, kIdxOrigOutput), orig_output_dnn_shape); + } else { + orig_output_tensor = MklGetInput(context, kIdxOrigOutput); + } - auto grads_shaped = - input_gradient_tensor.shaped<T, 2>({nodes * batch, depth}); + const int64 batch = static_cast<int64>(input_gradient_tensor.dim_size(0)); + const int64 rows = static_cast<int64>(input_gradient_tensor.dim_size(1)); + const int64 cols = static_cast<int64>(input_gradient_tensor.dim_size(2)); + const int64 depth = static_cast<int64>(input_gradient_tensor.dim_size(3)); + const auto nodes = cols * rows; - auto in_shaped = orig_input_tensor.shaped<T, 2>({nodes * batch, depth}); - auto activations = - orig_output_tensor.shaped<T, 2>({nodes * batch, depth}); + auto grads_shaped = + input_gradient_tensor.shaped<T, 2>({nodes * batch, depth}); - Tensor* output_dnn_data; - MklShape mkl_output_mkl_shape; - mkl_output_mkl_shape.SetMklTensor(false); - mkl_output_mkl_shape.SetDimensions(4); - AllocateOutputSetMklShape(context, kIdxOutput, - &output_dnn_data, - input_gradient_tensor.shape(), - mkl_output_mkl_shape); + auto in_shaped = orig_input_tensor.shaped<T, 2>({nodes * batch, depth}); + auto activations = orig_output_tensor.shaped<T, 2>({nodes * batch, depth}); - auto out_shaped = output_dnn_data->shaped<T, 2>({nodes * batch, depth}); - out_shaped.setZero(); - auto shard = [this, activations, in_shaped, grads_shaped, out_shaped, - depth](int64 begin, int64 end) { - for (int64 i = begin; i < end; ++i) { - for (int64 j = 0; j < depth; ++j) { - int64 depth_begin = std::max<int64>(0, j - depth_radius_); - int64 depth_end = std::min<int64>(depth, j + depth_radius_ + 1); + Tensor* output_dnn_data; + MklShape mkl_output_mkl_shape; + mkl_output_mkl_shape.SetMklTensor(false); + mkl_output_mkl_shape.SetDimensions(4); + AllocateOutputSetMklShape(context, kIdxOutput, &output_dnn_data, + input_gradient_tensor.shape(), + mkl_output_mkl_shape); - T norm(0); - for (int64 k = depth_begin; k < depth_end; ++k) { - norm += in_shaped(i, k) * in_shaped(i, k); - } - norm = alpha_ * norm + bias_; - DCHECK_GT(norm, T(1e-6)); - for (int64 k = depth_begin; k < depth_end; ++k) { - T dyi = T(-2) * alpha_ * beta_ * in_shaped(i, k) * - activations(i, j) / norm; - if (k == j) { - dyi += Eigen::numext::pow(norm, -beta_); - } - dyi *= grads_shaped(i, j); - const_cast<typename TTypes<T, 2>::Tensor&>(out_shaped)(i, k) += - dyi; + auto out_shaped = output_dnn_data->shaped<T, 2>({nodes * batch, depth}); + out_shaped.setZero(); + auto shard = [this, activations, in_shaped, grads_shaped, out_shaped, + depth](int64 begin, int64 end) { + for (int64 i = begin; i < end; ++i) { + for (int64 j = 0; j < depth; ++j) { + int64 depth_begin = std::max<int64>(0, j - depth_radius_); + int64 depth_end = std::min<int64>(depth, j + depth_radius_ + 1); + + T norm(0); + for (int64 k = depth_begin; k < depth_end; ++k) { + norm += in_shaped(i, k) * in_shaped(i, k); + } + norm = alpha_ * norm + bias_; + DCHECK_GT(norm, T(1e-6)); + for (int64 k = depth_begin; k < depth_end; ++k) { + T dyi = T(-2) * alpha_ * beta_ * in_shaped(i, k) * + activations(i, j) / norm; + if (k == j) { + dyi += Eigen::numext::pow(norm, -beta_); } + dyi *= grads_shaped(i, j); + const_cast<typename TTypes<T, 2>::Tensor&>(out_shaped)(i, k) += dyi; } } - }; - auto worker_threads = - *(context->device()->tensorflow_cpu_worker_threads()); - Shard(worker_threads.num_threads, worker_threads.workers, nodes * batch, - depth * depth, shard); - } + } + }; + auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads()); + Shard(worker_threads.num_threads, worker_threads.workers, nodes * batch, + depth * depth, shard); + } void SanityCheckInputs(OpKernelContext* context) { const Tensor& input_gradient_tensor = MklGetInput(context, kIdxGradient); @@ -1317,59 +1281,59 @@ class MklLRNGradOp : public OpKernel { const Tensor& orig_output_tensor = MklGetInput(context, kIdxOrigOutput); const Tensor& workspace_tensor = MklGetInput(context, kIdxWorkspace); MklDnnShape in_grads_dnn_shape, in_image_dnn_shape, out_image_dnn_shape, - workspace_dnn_shape; + workspace_dnn_shape; GetMklShape(context, kIdxGradient, &in_grads_dnn_shape); GetMklShape(context, kIdxOrigInput, &in_image_dnn_shape); GetMklShape(context, kIdxOrigOutput, &out_image_dnn_shape); GetMklShape(context, kIdxWorkspace, &workspace_dnn_shape); if (in_grads_dnn_shape.IsMklTensor()) { OP_REQUIRES(context, in_grads_dnn_shape.GetDimension() == 4, - errors::InvalidArgument("Input gradient must be " - "4-dimensional")); + errors::InvalidArgument("Input gradient must be " + "4-dimensional")); } else { - OP_REQUIRES(context, input_gradient_tensor.dims() == 4, - errors::InvalidArgument("input gradient must be 4-dimensional")); + OP_REQUIRES( + context, input_gradient_tensor.dims() == 4, + errors::InvalidArgument("input gradient must be 4-dimensional")); } if (in_image_dnn_shape.IsMklTensor()) { OP_REQUIRES(context, in_image_dnn_shape.GetDimension() == 4, - errors::InvalidArgument("input images must be " - "4-dimensional")); + errors::InvalidArgument("input images must be " + "4-dimensional")); } else { OP_REQUIRES(context, orig_input_tensor.dims() == 4, errors::InvalidArgument("input images must be " - "4-dimensional")); + "4-dimensional")); } if (out_image_dnn_shape.IsMklTensor()) { OP_REQUIRES(context, out_image_dnn_shape.GetDimension() == 4, - errors::InvalidArgument("Output image must be " - "4-dimensional")); + errors::InvalidArgument("Output image must be " + "4-dimensional")); } else { - OP_REQUIRES(context, orig_output_tensor.dims() == 4, - errors::InvalidArgument("Output image must be 4-dimensional")); + OP_REQUIRES( + context, orig_output_tensor.dims() == 4, + errors::InvalidArgument("Output image must be 4-dimensional")); } if (workspace_enabled_) { if (workspace_dnn_shape.IsMklTensor()) { - OP_REQUIRES(context, workspace_dnn_shape.IsMklTensor() == false, - errors::InvalidArgument("Workspace should not be MKL Tensor.")); + OP_REQUIRES( + context, workspace_dnn_shape.IsMklTensor() == false, + errors::InvalidArgument("Workspace should not be MKL Tensor.")); } else { OP_REQUIRES(context, workspace_tensor.dims() == 1, - errors::InvalidArgument("Workspace must be 1-dimensional")); + errors::InvalidArgument("Workspace must be 1-dimensional")); } } } -// Input("input_grads: T") -// Input("input_image: T") -// Input("output_image: T") -// Input("workspace: uint8") - const int kIdxGradient = 0, - kIdxOrigInput = 1, - kIdxOrigOutput = 2, - kIdxWorkspace = 3, - kIdxOutput = 0; + // Input("input_grads: T") + // Input("input_image: T") + // Input("output_image: T") + // Input("workspace: uint8") + const int kIdxGradient = 0, kIdxOrigInput = 1, kIdxOrigOutput = 2, + kIdxWorkspace = 3, kIdxOutput = 0; typedef typename Eigen::Tensor<T, 1, Eigen::RowMajor>::DimensionPair DimPair; bool workspace_enabled_; @@ -1393,7 +1357,6 @@ class MklLRNGradOp : public OpKernel { .Label(mkl_op_registry::kMklOpLabel), \ MklLRNGradOp<T>); - TF_CALL_float(REGISTER_MKL_LRN_CPU); } // namespace tensorflow |