aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/mkl_lrn_op.cc
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-01-26 11:59:56 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-01-26 12:03:52 -0800
commit0f65c8f572201f8838189f3e3c3e455759112c14 (patch)
tree55d3ad38ebe720d5c3edb8d0722b55d4cd778e3e /tensorflow/core/kernels/mkl_lrn_op.cc
parentff6463f4277f412b98d6e6bb1283841ff66902de (diff)
Cleanup: Ran clang-format on all *.{cc,h} files in tensorflow/core/kernels.
PiperOrigin-RevId: 183423961
Diffstat (limited to 'tensorflow/core/kernels/mkl_lrn_op.cc')
-rw-r--r--tensorflow/core/kernels/mkl_lrn_op.cc647
1 files changed, 305 insertions, 342 deletions
diff --git a/tensorflow/core/kernels/mkl_lrn_op.cc b/tensorflow/core/kernels/mkl_lrn_op.cc
index 95e0404ba8..a8b45004b7 100644
--- a/tensorflow/core/kernels/mkl_lrn_op.cc
+++ b/tensorflow/core/kernels/mkl_lrn_op.cc
@@ -22,6 +22,9 @@ limitations under the License.
#define EIGEN_USE_THREADS
#include <vector>
+#include "mkl_dnn.h"
+#include "mkl_dnn_types.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/core/framework/tensor.h"
@@ -30,9 +33,6 @@ limitations under the License.
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/util/mkl_util.h"
#include "tensorflow/core/util/tensor_format.h"
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "mkl_dnn.h"
-#include "mkl_dnn_types.h"
#if !defined(IS_MOBILE_PLATFORM)
#include "tensorflow/core/util/work_sharder.h"
@@ -40,10 +40,10 @@ limitations under the License.
#ifdef INTEL_MKL_DNN
#include "mkldnn.hpp"
-using mkldnn::lrn_forward;
+using mkldnn::lrn_across_channels;
using mkldnn::lrn_backward;
+using mkldnn::lrn_forward;
using mkldnn::prop_kind;
-using mkldnn::lrn_across_channels;
using mkldnn::stream;
#endif
@@ -77,10 +77,11 @@ class MklLRNOp : public OpKernel {
explicit MklLRNOp(OpKernelConstruction* context) : OpKernel(context) {
int64 depth_radius64;
OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64));
- OP_REQUIRES(context, FastBoundsCheck(depth_radius64,
- std::numeric_limits<int>::max()),
- errors::InvalidArgument("depth_radius = ", depth_radius64,
- " larger than int max"));
+ OP_REQUIRES(
+ context,
+ FastBoundsCheck(depth_radius64, std::numeric_limits<int>::max()),
+ errors::InvalidArgument("depth_radius = ", depth_radius64,
+ " larger than int max"));
depth_radius_ = static_cast<size_t>(depth_radius64);
OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_));
@@ -103,9 +104,10 @@ class MklLRNOp : public OpKernel {
: input.dims();
OP_REQUIRES(context, mkl_context.in_dims == 4,
errors::InvalidArgument("input must be 4-dimensional"));
- OP_REQUIRES(context, FastBoundsCheck(input.NumElements(),
- std::numeric_limits<int>::max()),
- errors::InvalidArgument("argument to LRN too large"));
+ OP_REQUIRES(
+ context,
+ FastBoundsCheck(input.NumElements(), std::numeric_limits<int>::max()),
+ errors::InvalidArgument("argument to LRN too large"));
if (!input_in_mkl_format) {
mkl_context.MklDefaultToEigen(context, depth_radius_, bias_, alpha_,
@@ -339,17 +341,17 @@ class MklLRNOp : public OpKernel {
float beta_;
};
-
template <typename T>
class MklLRNGradOp : public OpKernel {
public:
explicit MklLRNGradOp(OpKernelConstruction* context) : OpKernel(context) {
int64 depth_radius64;
OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64));
- OP_REQUIRES(context, FastBoundsCheck(depth_radius64,
- std::numeric_limits<int>::max()),
- errors::InvalidArgument("depth_radius = ", depth_radius64,
- " larger than int max"));
+ OP_REQUIRES(
+ context,
+ FastBoundsCheck(depth_radius64, std::numeric_limits<int>::max()),
+ errors::InvalidArgument("depth_radius = ", depth_radius64,
+ " larger than int max"));
depth_radius_ = static_cast<int>(depth_radius64);
OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_));
OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_));
@@ -740,10 +742,11 @@ class MklLRNOp : public OpKernel {
explicit MklLRNOp(OpKernelConstruction* context) : OpKernel(context) {
int64 depth_radius64;
OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64));
- OP_REQUIRES(context, FastBoundsCheck(depth_radius64,
- std::numeric_limits<int>::max()),
- errors::InvalidArgument("depth_radius = ", depth_radius64,
- " larger than int max"));
+ OP_REQUIRES(
+ context,
+ FastBoundsCheck(depth_radius64, std::numeric_limits<int>::max()),
+ errors::InvalidArgument("depth_radius = ", depth_radius64,
+ " larger than int max"));
depth_radius_ = static_cast<size_t>(depth_radius64);
OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_));
@@ -773,10 +776,10 @@ class MklLRNOp : public OpKernel {
if (!src_dnn_shape.IsMklTensor()) {
MklDefaultToEigen(context, src_tensor);
return;
- } else if (!src_dnn_shape.IsMklChannelDim(
- src_dnn_shape.GetDimension() - 1) ) {
+ } else if (!src_dnn_shape.IsMklChannelDim(src_dnn_shape.GetDimension() -
+ 1)) {
Tensor converted_tensor =
- ConvertMklToTF<T>(context, src_tensor, src_dnn_shape);
+ ConvertMklToTF<T>(context, src_tensor, src_dnn_shape);
MklDefaultToEigen(context, converted_tensor);
return;
}
@@ -807,18 +810,16 @@ class MklLRNOp : public OpKernel {
// Create LRN primitive descriptor.
// Tensorflow's normalization semantics is across channels.
// MKL-DNN also supports normalization within channel.
- auto lrn_desc = lrn_forward::desc(prop_kind::forward,
- lrn_across_channels,
+ auto lrn_desc = lrn_forward::desc(prop_kind::forward, lrn_across_channels,
src_dnn_data.GetUsrMemDesc(),
- kernel_size,
- new_alpha, beta_, bias_);
+ kernel_size, new_alpha, beta_, bias_);
auto lrn_prim_desc = lrn_forward::primitive_desc(lrn_desc, cpu_engine);
// Allocate output_dnn_data tensor.
Tensor* output_tensor = nullptr;
memory::format input_format = src_dnn_shape.GetTfDataFormat();
- AllocateOutputTensor(context, lrn_prim_desc, input_dims,
- input_format, &output_tensor);
+ AllocateOutputTensor(context, lrn_prim_desc, input_dims, input_format,
+ &output_tensor);
OP_REQUIRES_OK(context, context->status());
CHECK_NOTNULL(output_tensor);
dst_dnn_data.SetUsrMemDataHandle(output_tensor);
@@ -827,25 +828,23 @@ class MklLRNOp : public OpKernel {
AllocateWorkspaceTensor(context, lrn_prim_desc, &workspace_dnn_data);
OP_REQUIRES_OK(context, context->status());
- PrepareAndExecuteNet(lrn_prim_desc, &src_dnn_data,
- &dst_dnn_data, &workspace_dnn_data);
- } catch (mkldnn::error &e) {
+ PrepareAndExecuteNet(lrn_prim_desc, &src_dnn_data, &dst_dnn_data,
+ &workspace_dnn_data);
+ } catch (mkldnn::error& e) {
string error_msg = "Status: " + std::to_string(e.status) +
- ", message: " + string(e.message) +
- ", in file " + string(__FILE__) + ":" +
- std::to_string(__LINE__);
- OP_REQUIRES_OK(context,
- errors::Aborted("Operation received an exception:",
- error_msg));
+ ", message: " + string(e.message) + ", in file " +
+ string(__FILE__) + ":" + std::to_string(__LINE__);
+ OP_REQUIRES_OK(
+ context,
+ errors::Aborted("Operation received an exception:", error_msg));
}
}
private:
- void PrepareAndExecuteNet(
- const lrn_forward::primitive_desc& lrn_fwd_desc,
- MklDnnData<T>* src_dnn_data,
- MklDnnData<T>* dst_dnn_data,
- MklDnnData<uint8>* wksp_dnn_data = nullptr) {
+ void PrepareAndExecuteNet(const lrn_forward::primitive_desc& lrn_fwd_desc,
+ MklDnnData<T>* src_dnn_data,
+ MklDnnData<T>* dst_dnn_data,
+ MklDnnData<uint8>* wksp_dnn_data = nullptr) {
std::vector<primitive> net;
// Check for input reorder
@@ -853,23 +852,21 @@ class MklLRNOp : public OpKernel {
// Create pooling primitive and add it to net
if (wksp_dnn_data != nullptr) {
- net.push_back(lrn_forward(lrn_fwd_desc,
- src_dnn_data->GetOpMem(),
- wksp_dnn_data->GetOpMem(),
- dst_dnn_data->GetOpMem()));
+ net.push_back(lrn_forward(lrn_fwd_desc, src_dnn_data->GetOpMem(),
+ wksp_dnn_data->GetOpMem(),
+ dst_dnn_data->GetOpMem()));
} else {
- net.push_back(lrn_forward(lrn_fwd_desc,
- src_dnn_data->GetOpMem(),
- dst_dnn_data->GetOpMem()));
+ net.push_back(lrn_forward(lrn_fwd_desc, src_dnn_data->GetOpMem(),
+ dst_dnn_data->GetOpMem()));
}
stream(stream::kind::eager).submit(net).wait();
}
- void AllocateOutputTensor(OpKernelContext* context,
- const lrn_forward::primitive_desc& lrn_fwd_prim_desc,
- const memory::dims output_dims_mkl_order,
- const memory::format& output_tf_format,
- Tensor** output_tensor) {
+ void AllocateOutputTensor(
+ OpKernelContext* context,
+ const lrn_forward::primitive_desc& lrn_fwd_prim_desc,
+ const memory::dims output_dims_mkl_order,
+ const memory::format& output_tf_format, Tensor** output_tensor) {
CHECK_NOTNULL(output_tensor);
memory::primitive_desc dst_pd = lrn_fwd_prim_desc.dst_primitive_desc();
@@ -880,111 +877,106 @@ class MklLRNOp : public OpKernel {
output_mkl_shape.SetMklLayout(&dst_pd);
output_mkl_shape.SetElemType(MklDnnType<T>());
output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
- output_dims_mkl_order,
- output_tf_format);
+ output_dims_mkl_order, output_tf_format);
TensorShape output_tf_shape;
// only allocate enough space for the elements we need.
size_t num_bytes = dst_pd.get_size();
CHECK_EQ(num_bytes % sizeof(T), 0);
output_tf_shape.AddDim(num_bytes / sizeof(T));
- AllocateOutputSetMklShape(context, kIdxOutput,
- output_tensor,
- output_tf_shape, output_mkl_shape);
- }
-
- // Fallback implementation - Taken from lrn_op.cc
- // TODO(inteltf) Check if we can use EigenLRNOp directly instead of making a
- // copy.
- void MklDefaultToEigen(OpKernelContext* context,
- const Tensor& input) {
- const int batch = static_cast<int>(input.dim_size(0));
- const int rows = static_cast<int>(input.dim_size(1));
- const int cols = static_cast<int>(input.dim_size(2));
- const int depth = static_cast<int>(input.dim_size(3));
- const int nodes = cols * rows;
-
- auto in_shaped = input.shaped<T, 2>({nodes * batch, depth});
- // Multiplying the input with the band matrix has the effect of reducing
- // the
- // correct patch along the depth.
- Eigen::Tensor<T, 2, Eigen::RowMajor> multiplier(depth, depth);
- GetBandMatrix<T>(depth, depth_radius_, &multiplier);
+ AllocateOutputSetMklShape(context, kIdxOutput, output_tensor,
+ output_tf_shape, output_mkl_shape);
+ }
- Tensor *output_dnn_data = nullptr;
- MklDnnShape mkl_output_mkl_shape;
- mkl_output_mkl_shape.SetMklTensor(false);
- mkl_output_mkl_shape.SetDimensions(4);
- AllocateOutputSetMklShape(context, kIdxOutput, &output_dnn_data,
- input.shape(), mkl_output_mkl_shape);
- CHECK_NOTNULL(output_dnn_data);
-
- Tensor* workspace_tensor = nullptr;
- MklDnnShape workspace_mkl_shape;
- workspace_mkl_shape.SetMklTensor(false);
- TensorShape workspace_tf_shape;
- workspace_tf_shape.AddDim(0);
- AllocateOutputSetMklShape(context, kIdxWorkspace,
- &workspace_tensor,
+ // Fallback implementation - Taken from lrn_op.cc
+ // TODO(inteltf) Check if we can use EigenLRNOp directly instead of making a
+ // copy.
+ void MklDefaultToEigen(OpKernelContext* context, const Tensor& input) {
+ const int batch = static_cast<int>(input.dim_size(0));
+ const int rows = static_cast<int>(input.dim_size(1));
+ const int cols = static_cast<int>(input.dim_size(2));
+ const int depth = static_cast<int>(input.dim_size(3));
+ const int nodes = cols * rows;
+
+ auto in_shaped = input.shaped<T, 2>({nodes * batch, depth});
+ // Multiplying the input with the band matrix has the effect of reducing
+ // the
+ // correct patch along the depth.
+ Eigen::Tensor<T, 2, Eigen::RowMajor> multiplier(depth, depth);
+ GetBandMatrix<T>(depth, depth_radius_, &multiplier);
+
+ Tensor* output_dnn_data = nullptr;
+ MklDnnShape mkl_output_mkl_shape;
+ mkl_output_mkl_shape.SetMklTensor(false);
+ mkl_output_mkl_shape.SetDimensions(4);
+ AllocateOutputSetMklShape(context, kIdxOutput, &output_dnn_data,
+ input.shape(), mkl_output_mkl_shape);
+ CHECK_NOTNULL(output_dnn_data);
+
+ Tensor* workspace_tensor = nullptr;
+ MklDnnShape workspace_mkl_shape;
+ workspace_mkl_shape.SetMklTensor(false);
+ TensorShape workspace_tf_shape;
+ workspace_tf_shape.AddDim(0);
+ AllocateOutputSetMklShape(context, kIdxWorkspace, &workspace_tensor,
workspace_tf_shape, workspace_mkl_shape);
- CHECK_NOTNULL(workspace_tensor);
-
- auto out_shaped = output_dnn_data->shaped<T, 2>({nodes * batch, depth});
- Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
- auto tmp = in_shaped.square().contract(multiplier, dims) * alpha_ + bias_;
- if (beta_ == T(1)) {
- out_shaped.device(context->eigen_cpu_device()) =
- in_shaped * tmp.inverse();
- } else if (beta_ == T(0.5)) {
- out_shaped.device(context->eigen_cpu_device()) =
- in_shaped * tmp.rsqrt();
- } else {
- out_shaped.device(context->eigen_cpu_device()) =
- in_shaped * (tmp.log() * -beta_).exp();
- }
+ CHECK_NOTNULL(workspace_tensor);
+
+ auto out_shaped = output_dnn_data->shaped<T, 2>({nodes * batch, depth});
+ Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
+ auto tmp = in_shaped.square().contract(multiplier, dims) * alpha_ + bias_;
+ if (beta_ == T(1)) {
+ out_shaped.device(context->eigen_cpu_device()) =
+ in_shaped * tmp.inverse();
+ } else if (beta_ == T(0.5)) {
+ out_shaped.device(context->eigen_cpu_device()) = in_shaped * tmp.rsqrt();
+ } else {
+ out_shaped.device(context->eigen_cpu_device()) =
+ in_shaped * (tmp.log() * -beta_).exp();
}
+ }
- void AllocateWorkspaceTensor(OpKernelContext* context,
- const lrn_forward::primitive_desc& lrn_fwd_prim_desc,
- MklDnnData<uint8>* dnn_data_wksp) {
- CHECK_NOTNULL(dnn_data_wksp);
- Tensor* workspace_tensor = nullptr;
- memory::primitive_desc workspace_pd
- = lrn_fwd_prim_desc.workspace_primitive_desc();
- size_t workspace_bytes = workspace_pd.get_size();
- MklDnnShape workspace_mkl_shape;
- // the workspace tensor is a uint8 tensor that has
- // exactly the number of bytes necessary
- workspace_mkl_shape.SetMklTensor(false);
- TensorShape workspace_tf_shape;
- workspace_tf_shape.AddDim(workspace_bytes);
- AllocateOutputSetMklShape(context, kIdxWorkspace,
- &workspace_tensor,
+ void AllocateWorkspaceTensor(
+ OpKernelContext* context,
+ const lrn_forward::primitive_desc& lrn_fwd_prim_desc,
+ MklDnnData<uint8>* dnn_data_wksp) {
+ CHECK_NOTNULL(dnn_data_wksp);
+ Tensor* workspace_tensor = nullptr;
+ memory::primitive_desc workspace_pd =
+ lrn_fwd_prim_desc.workspace_primitive_desc();
+ size_t workspace_bytes = workspace_pd.get_size();
+ MklDnnShape workspace_mkl_shape;
+ // the workspace tensor is a uint8 tensor that has
+ // exactly the number of bytes necessary
+ workspace_mkl_shape.SetMklTensor(false);
+ TensorShape workspace_tf_shape;
+ workspace_tf_shape.AddDim(workspace_bytes);
+ AllocateOutputSetMklShape(context, kIdxWorkspace, &workspace_tensor,
workspace_tf_shape, workspace_mkl_shape);
- CHECK_NOTNULL(workspace_tensor);
- dnn_data_wksp->SetUsrMem(workspace_pd, workspace_tensor);
- }
+ CHECK_NOTNULL(workspace_tensor);
+ dnn_data_wksp->SetUsrMem(workspace_pd, workspace_tensor);
+ }
void SanityCheckInputs(OpKernelContext* context) {
const Tensor& src_tensor = MklGetInput(context, kIdxInput);
MklDnnShape src_dnn_shape;
GetMklShape(context, kIdxInput, &src_dnn_shape);
if (src_dnn_shape.IsMklTensor()) {
- OP_REQUIRES(context, src_dnn_shape.GetDimension() == 4,
- errors::InvalidArgument("input must be 4-dimensional"));
- OP_REQUIRES(context, FastBoundsCheck(src_tensor.NumElements(),
- std::numeric_limits<int>::max()),
- errors::InvalidArgument("argument to LRN too large"));
+ OP_REQUIRES(context, src_dnn_shape.GetDimension() == 4,
+ errors::InvalidArgument("input must be 4-dimensional"));
+ OP_REQUIRES(context,
+ FastBoundsCheck(src_tensor.NumElements(),
+ std::numeric_limits<int>::max()),
+ errors::InvalidArgument("argument to LRN too large"));
} else {
- OP_REQUIRES(context, src_tensor.dims() == 4,
- errors::InvalidArgument("input must be 4-dimensional"));
- OP_REQUIRES(context, FastBoundsCheck(src_tensor.NumElements(),
- std::numeric_limits<int>::max()),
- errors::InvalidArgument("argument to LRN too large"));
+ OP_REQUIRES(context, src_tensor.dims() == 4,
+ errors::InvalidArgument("input must be 4-dimensional"));
+ OP_REQUIRES(context,
+ FastBoundsCheck(src_tensor.NumElements(),
+ std::numeric_limits<int>::max()),
+ errors::InvalidArgument("argument to LRN too large"));
}
}
- const int kIdxInput = 0,
- kIdxOutput = 0,
- kIdxWorkspace = 1;
+ const int kIdxInput = 0, kIdxOutput = 0, kIdxWorkspace = 1;
typedef typename Eigen::Tensor<T, 1, Eigen::RowMajor>::DimensionPair DimPair;
bool workspace_enabled_;
@@ -994,17 +986,17 @@ class MklLRNOp : public OpKernel {
float beta_;
};
-
template <typename T>
class MklLRNGradOp : public OpKernel {
public:
explicit MklLRNGradOp(OpKernelConstruction* context) : OpKernel(context) {
int64 depth_radius64;
OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64));
- OP_REQUIRES(context, FastBoundsCheck(depth_radius64,
- std::numeric_limits<int>::max()),
- errors::InvalidArgument("depth_radius = ", depth_radius64,
- " larger than int max"));
+ OP_REQUIRES(
+ context,
+ FastBoundsCheck(depth_radius64, std::numeric_limits<int>::max()),
+ errors::InvalidArgument("depth_radius = ", depth_radius64,
+ " larger than int max"));
depth_radius_ = static_cast<int>(depth_radius64);
OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_));
OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_));
@@ -1025,7 +1017,7 @@ class MklLRNGradOp : public OpKernel {
MklDnnData<T> output_dnn_data(&cpu_engine);
MklDnnShape input_grad_dnn_shape, orig_input_dnn_shape,
- orig_output_dnn_shape;
+ orig_output_dnn_shape;
GetMklShape(context, kIdxGradient, &input_grad_dnn_shape);
GetMklShape(context, kIdxOrigInput, &orig_input_dnn_shape);
GetMklShape(context, kIdxOrigOutput, &orig_output_dnn_shape);
@@ -1037,16 +1029,16 @@ class MklLRNGradOp : public OpKernel {
orig_input_dnn_shape.IsMklTensor() &&
orig_output_dnn_shape.IsMklTensor() &&
input_grad_dnn_shape.IsMklChannelDim(
- input_grad_dnn_shape.GetDimension() - 1) &&
+ input_grad_dnn_shape.GetDimension() - 1) &&
orig_input_dnn_shape.IsMklChannelDim(
- orig_input_dnn_shape.GetDimension() - 1) &&
+ orig_input_dnn_shape.GetDimension() - 1) &&
orig_output_dnn_shape.IsMklChannelDim(
- orig_output_dnn_shape.GetDimension() - 1);
+ orig_output_dnn_shape.GetDimension() - 1);
if (!can_use_mkldnn) {
- // Fallback to eigen
- MklDefaultToEigen(context);
- return;
+ // Fallback to eigen
+ MklDefaultToEigen(context);
+ return;
}
// At this point, we have the all clear to use MklDnn constructs
// Naming: diff_dst is input_gradient_tensor; src is orig_input_tensor.
@@ -1059,13 +1051,11 @@ class MklLRNGradOp : public OpKernel {
// NHWC format.
memory::desc original_output_md = orig_output_dnn_shape.GetCurLayout();
memory::desc target_diff_dst_md = ConfigureInputGradient(
- input_grad_tensor,
- input_grad_dnn_shape,
- &input_grad_dnn_data);
+ input_grad_tensor, input_grad_dnn_shape, &input_grad_dnn_data);
memory::desc orig_input_md = orig_input_dnn_shape.GetCurLayout();
memory::dims orig_input_dims =
- orig_input_dnn_shape.GetSizesAsMklDnnDims();
+ orig_input_dnn_shape.GetSizesAsMklDnnDims();
orig_input_dnn_data.SetUsrMem(orig_input_md, &orig_input_tensor);
orig_input_dnn_data.SetOpMemDesc(orig_input_dims, memory::format::nhwc);
@@ -1079,27 +1069,21 @@ class MklLRNGradOp : public OpKernel {
// Create LRN backward primitive descriptor. It requires LRN forward
// primitive descriptor also.
- auto lrn_fwd_desc = lrn_forward::desc(prop_kind::forward,
- lrn_across_channels,
- orig_input_md,
- kernel_size,
- new_alpha, beta_, bias_);
- auto lrn_fwd_prim_desc = lrn_forward::primitive_desc(lrn_fwd_desc,
- cpu_engine);
- auto lrn_bwd_desc = lrn_backward::desc(lrn_across_channels,
- original_output_md,
- target_diff_dst_md,
- kernel_size,
- new_alpha, beta_, bias_);
- auto lrn_bwd_prim_desc = lrn_backward::primitive_desc(lrn_bwd_desc,
- cpu_engine,
- lrn_fwd_prim_desc);
+ auto lrn_fwd_desc = lrn_forward::desc(
+ prop_kind::forward, lrn_across_channels, orig_input_md, kernel_size,
+ new_alpha, beta_, bias_);
+ auto lrn_fwd_prim_desc =
+ lrn_forward::primitive_desc(lrn_fwd_desc, cpu_engine);
+ auto lrn_bwd_desc = lrn_backward::desc(
+ lrn_across_channels, original_output_md, target_diff_dst_md,
+ kernel_size, new_alpha, beta_, bias_);
+ auto lrn_bwd_prim_desc = lrn_backward::primitive_desc(
+ lrn_bwd_desc, cpu_engine, lrn_fwd_prim_desc);
Tensor* output_tensor = nullptr;
- memory::format orig_input_format
- = orig_input_dnn_shape.GetTfDataFormat();
- AllocateOutputTensor(context, lrn_bwd_prim_desc,
- orig_input_dims, orig_input_format, &output_tensor);
+ memory::format orig_input_format = orig_input_dnn_shape.GetTfDataFormat();
+ AllocateOutputTensor(context, lrn_bwd_prim_desc, orig_input_dims,
+ orig_input_format, &output_tensor);
OP_REQUIRES_OK(context, context->status());
CHECK_NOTNULL(output_tensor);
output_dnn_data.SetUsrMemDataHandle(output_tensor);
@@ -1110,35 +1094,32 @@ class MklLRNGradOp : public OpKernel {
const Tensor& workspace_tensor = MklGetInput(context, kIdxWorkspace);
MklDnnData<uint8> workspace_dnn_data(&cpu_engine);
ConfigureWorkspace(workspace_tensor,
- lrn_fwd_prim_desc.workspace_primitive_desc(),
- &workspace_dnn_data);
-
- PrepareAndExecuteNet(lrn_bwd_prim_desc,
- lrn_fwd_prim_desc,
- &orig_input_dnn_data,
- &input_grad_dnn_data,
- &output_dnn_data,
- memory::primitive_desc(target_diff_dst_md, cpu_engine),
- &workspace_dnn_data);
- } catch (mkldnn::error &e) {
+ lrn_fwd_prim_desc.workspace_primitive_desc(),
+ &workspace_dnn_data);
+
+ PrepareAndExecuteNet(
+ lrn_bwd_prim_desc, lrn_fwd_prim_desc, &orig_input_dnn_data,
+ &input_grad_dnn_data, &output_dnn_data,
+ memory::primitive_desc(target_diff_dst_md, cpu_engine),
+ &workspace_dnn_data);
+ } catch (mkldnn::error& e) {
string error_msg = "Status: " + std::to_string(e.status) +
- ", message: " + string(e.message) +
- ", in file " + string(__FILE__) + ":" +
- std::to_string(__LINE__);
- OP_REQUIRES_OK(context,
- errors::Aborted("Operation received an exception:",
- error_msg));
+ ", message: " + string(e.message) + ", in file " +
+ string(__FILE__) + ":" + std::to_string(__LINE__);
+ OP_REQUIRES_OK(
+ context,
+ errors::Aborted("Operation received an exception:", error_msg));
}
}
- void AllocateOutputTensor(OpKernelContext* context,
- const lrn_backward::primitive_desc& lrn_bkwd_prim_desc,
- const memory::dims output_dims_mkl_order,
- const memory::format& output_tf_format,
- Tensor** output_tensor) {
+ void AllocateOutputTensor(
+ OpKernelContext* context,
+ const lrn_backward::primitive_desc& lrn_bkwd_prim_desc,
+ const memory::dims output_dims_mkl_order,
+ const memory::format& output_tf_format, Tensor** output_tensor) {
CHECK_NOTNULL(output_tensor);
- memory::primitive_desc dst_pd
- = lrn_bkwd_prim_desc.diff_src_primitive_desc();
+ memory::primitive_desc dst_pd =
+ lrn_bkwd_prim_desc.diff_src_primitive_desc();
MklDnnShape output_mkl_shape;
// We assume that all outputs at this point are MKL Tensors
@@ -1146,170 +1127,153 @@ class MklLRNGradOp : public OpKernel {
output_mkl_shape.SetMklLayout(&dst_pd);
output_mkl_shape.SetElemType(MklDnnType<T>());
output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
- output_dims_mkl_order,
- output_tf_format);
+ output_dims_mkl_order, output_tf_format);
TensorShape output_tf_shape;
size_t num_bytes = dst_pd.get_size();
CHECK_EQ(num_bytes % sizeof(T), 0);
output_tf_shape.AddDim(num_bytes / sizeof(T));
- AllocateOutputSetMklShape(context, kIdxOutput,
- output_tensor,
- output_tf_shape, output_mkl_shape);
+ AllocateOutputSetMklShape(context, kIdxOutput, output_tensor,
+ output_tf_shape, output_mkl_shape);
}
memory::desc ConfigureInputGradient(const Tensor& input_grad_tensor,
- const MklDnnShape& input_grad_dnn_shape,
- MklDnnData<T> *input_grad_dnn_data) {
+ const MklDnnShape& input_grad_dnn_shape,
+ MklDnnData<T>* input_grad_dnn_data) {
CHECK_NOTNULL(input_grad_dnn_data);
// This shouldn't be necessary at this point, but just in case
CHECK_EQ(input_grad_dnn_shape.IsMklTensor(), true);
memory::desc input_grad_md = input_grad_dnn_shape.GetCurLayout();
- memory::dims orig_input_dims =
- input_grad_dnn_shape.GetSizesAsMklDnnDims();
+ memory::dims orig_input_dims = input_grad_dnn_shape.GetSizesAsMklDnnDims();
input_grad_dnn_data->SetUsrMem(input_grad_md, &input_grad_tensor);
input_grad_dnn_data->SetOpMemDesc(orig_input_dims, memory::format::nhwc);
return input_grad_md;
}
void PrepareAndExecuteNet(
- const lrn_backward::primitive_desc& lrn_bkwd_desc,
- const lrn_forward::primitive_desc& lrn_fwd_desc,
- MklDnnData<T>* src_dnn_data,
- MklDnnData<T>* input_gradient_diff_dst,
- MklDnnData<T>* output_diff_src,
- const memory::primitive_desc& target_diff_dst_pd,
- const MklDnnData<uint8>* workspace_dnn_data = nullptr) {
+ const lrn_backward::primitive_desc& lrn_bkwd_desc,
+ const lrn_forward::primitive_desc& lrn_fwd_desc,
+ MklDnnData<T>* src_dnn_data, MklDnnData<T>* input_gradient_diff_dst,
+ MklDnnData<T>* output_diff_src,
+ const memory::primitive_desc& target_diff_dst_pd,
+ const MklDnnData<uint8>* workspace_dnn_data = nullptr) {
std::vector<primitive> net;
// Check for input reordering on the diff dst input
input_gradient_diff_dst->CheckReorderToOpMem(
- lrn_bkwd_desc.diff_dst_primitive_desc(), &net);
+ lrn_bkwd_desc.diff_dst_primitive_desc(), &net);
// Check for input reordering on the original input
- src_dnn_data->CheckReorderToOpMem(lrn_fwd_desc.src_primitive_desc(),
- &net);
+ src_dnn_data->CheckReorderToOpMem(lrn_fwd_desc.src_primitive_desc(), &net);
// Create pooling primitive and add it to net
if (nullptr == workspace_dnn_data) {
- net.push_back(lrn_backward(lrn_bkwd_desc,
- src_dnn_data->GetOpMem(),
- input_gradient_diff_dst->GetOpMem(),
- output_diff_src->GetOpMem()));
+ net.push_back(lrn_backward(lrn_bkwd_desc, src_dnn_data->GetOpMem(),
+ input_gradient_diff_dst->GetOpMem(),
+ output_diff_src->GetOpMem()));
} else {
- net.push_back(lrn_backward(lrn_bkwd_desc,
- src_dnn_data->GetOpMem(),
- input_gradient_diff_dst->GetOpMem(),
- workspace_dnn_data->GetOpMem(),
- output_diff_src->GetOpMem()));
+ net.push_back(lrn_backward(lrn_bkwd_desc, src_dnn_data->GetOpMem(),
+ input_gradient_diff_dst->GetOpMem(),
+ workspace_dnn_data->GetOpMem(),
+ output_diff_src->GetOpMem()));
}
stream(stream::kind::eager).submit(net).wait();
}
void ConfigureWorkspace(const Tensor& workspace_tensor,
- memory::primitive_desc workspace_pd,
- MklDnnData<uint8> *workspace_dnn_data) {
+ memory::primitive_desc workspace_pd,
+ MklDnnData<uint8>* workspace_dnn_data) {
CHECK_NOTNULL(workspace_dnn_data);
workspace_dnn_data->SetUsrMem(workspace_pd, &workspace_tensor);
}
- // Fallback implementation - Taken from lrn_op.cc
- // TODO(intelft) Check if we can use EigenLRNOp directly instead of making a
- // copy.
- void MklDefaultToEigen(OpKernelContext* context) {
- Tensor input_gradient_tensor;
- Tensor orig_input_tensor;
- Tensor orig_output_tensor;
-
- MklDnnShape input_grad_dnn_shape, orig_input_dnn_shape,
- orig_output_dnn_shape;
- GetMklShape(context, kIdxGradient, &input_grad_dnn_shape);
- GetMklShape(context, kIdxOrigInput, &orig_input_dnn_shape);
- GetMklShape(context, kIdxOrigOutput, &orig_output_dnn_shape);
-
- if (input_grad_dnn_shape.IsMklTensor()) {
- input_gradient_tensor =
- ConvertMklToTF<T>(context,
- MklGetInput(context, kIdxGradient),
- input_grad_dnn_shape);
- } else {
- input_gradient_tensor = MklGetInput(context, kIdxGradient);
- }
-
- if (orig_input_dnn_shape.IsMklTensor()) {
- orig_input_tensor =
- ConvertMklToTF<T>(context,
- MklGetInput(context, kIdxOrigInput),
- orig_input_dnn_shape);
- } else {
- orig_input_tensor = MklGetInput(context, kIdxOrigInput);
- }
+ // Fallback implementation - Taken from lrn_op.cc
+ // TODO(intelft) Check if we can use EigenLRNOp directly instead of making a
+ // copy.
+ void MklDefaultToEigen(OpKernelContext* context) {
+ Tensor input_gradient_tensor;
+ Tensor orig_input_tensor;
+ Tensor orig_output_tensor;
+
+ MklDnnShape input_grad_dnn_shape, orig_input_dnn_shape,
+ orig_output_dnn_shape;
+ GetMklShape(context, kIdxGradient, &input_grad_dnn_shape);
+ GetMklShape(context, kIdxOrigInput, &orig_input_dnn_shape);
+ GetMklShape(context, kIdxOrigOutput, &orig_output_dnn_shape);
+
+ if (input_grad_dnn_shape.IsMklTensor()) {
+ input_gradient_tensor = ConvertMklToTF<T>(
+ context, MklGetInput(context, kIdxGradient), input_grad_dnn_shape);
+ } else {
+ input_gradient_tensor = MklGetInput(context, kIdxGradient);
+ }
- if (orig_output_dnn_shape.IsMklTensor()) {
- orig_output_tensor =
- ConvertMklToTF<T>(context,
- MklGetInput(context, kIdxOrigOutput),
- orig_output_dnn_shape);
- } else {
- orig_output_tensor = MklGetInput(context, kIdxOrigOutput);
- }
+ if (orig_input_dnn_shape.IsMklTensor()) {
+ orig_input_tensor = ConvertMklToTF<T>(
+ context, MklGetInput(context, kIdxOrigInput), orig_input_dnn_shape);
+ } else {
+ orig_input_tensor = MklGetInput(context, kIdxOrigInput);
+ }
- const int64 batch = static_cast<int64>(input_gradient_tensor.dim_size(0));
- const int64 rows = static_cast<int64>(input_gradient_tensor.dim_size(1));
- const int64 cols = static_cast<int64>(input_gradient_tensor.dim_size(2));
- const int64 depth = static_cast<int64>(input_gradient_tensor.dim_size(3));
- const auto nodes = cols * rows;
+ if (orig_output_dnn_shape.IsMklTensor()) {
+ orig_output_tensor = ConvertMklToTF<T>(
+ context, MklGetInput(context, kIdxOrigOutput), orig_output_dnn_shape);
+ } else {
+ orig_output_tensor = MklGetInput(context, kIdxOrigOutput);
+ }
- auto grads_shaped =
- input_gradient_tensor.shaped<T, 2>({nodes * batch, depth});
+ const int64 batch = static_cast<int64>(input_gradient_tensor.dim_size(0));
+ const int64 rows = static_cast<int64>(input_gradient_tensor.dim_size(1));
+ const int64 cols = static_cast<int64>(input_gradient_tensor.dim_size(2));
+ const int64 depth = static_cast<int64>(input_gradient_tensor.dim_size(3));
+ const auto nodes = cols * rows;
- auto in_shaped = orig_input_tensor.shaped<T, 2>({nodes * batch, depth});
- auto activations =
- orig_output_tensor.shaped<T, 2>({nodes * batch, depth});
+ auto grads_shaped =
+ input_gradient_tensor.shaped<T, 2>({nodes * batch, depth});
- Tensor* output_dnn_data;
- MklShape mkl_output_mkl_shape;
- mkl_output_mkl_shape.SetMklTensor(false);
- mkl_output_mkl_shape.SetDimensions(4);
- AllocateOutputSetMklShape(context, kIdxOutput,
- &output_dnn_data,
- input_gradient_tensor.shape(),
- mkl_output_mkl_shape);
+ auto in_shaped = orig_input_tensor.shaped<T, 2>({nodes * batch, depth});
+ auto activations = orig_output_tensor.shaped<T, 2>({nodes * batch, depth});
- auto out_shaped = output_dnn_data->shaped<T, 2>({nodes * batch, depth});
- out_shaped.setZero();
- auto shard = [this, activations, in_shaped, grads_shaped, out_shaped,
- depth](int64 begin, int64 end) {
- for (int64 i = begin; i < end; ++i) {
- for (int64 j = 0; j < depth; ++j) {
- int64 depth_begin = std::max<int64>(0, j - depth_radius_);
- int64 depth_end = std::min<int64>(depth, j + depth_radius_ + 1);
+ Tensor* output_dnn_data;
+ MklShape mkl_output_mkl_shape;
+ mkl_output_mkl_shape.SetMklTensor(false);
+ mkl_output_mkl_shape.SetDimensions(4);
+ AllocateOutputSetMklShape(context, kIdxOutput, &output_dnn_data,
+ input_gradient_tensor.shape(),
+ mkl_output_mkl_shape);
- T norm(0);
- for (int64 k = depth_begin; k < depth_end; ++k) {
- norm += in_shaped(i, k) * in_shaped(i, k);
- }
- norm = alpha_ * norm + bias_;
- DCHECK_GT(norm, T(1e-6));
- for (int64 k = depth_begin; k < depth_end; ++k) {
- T dyi = T(-2) * alpha_ * beta_ * in_shaped(i, k) *
- activations(i, j) / norm;
- if (k == j) {
- dyi += Eigen::numext::pow(norm, -beta_);
- }
- dyi *= grads_shaped(i, j);
- const_cast<typename TTypes<T, 2>::Tensor&>(out_shaped)(i, k) +=
- dyi;
+ auto out_shaped = output_dnn_data->shaped<T, 2>({nodes * batch, depth});
+ out_shaped.setZero();
+ auto shard = [this, activations, in_shaped, grads_shaped, out_shaped,
+ depth](int64 begin, int64 end) {
+ for (int64 i = begin; i < end; ++i) {
+ for (int64 j = 0; j < depth; ++j) {
+ int64 depth_begin = std::max<int64>(0, j - depth_radius_);
+ int64 depth_end = std::min<int64>(depth, j + depth_radius_ + 1);
+
+ T norm(0);
+ for (int64 k = depth_begin; k < depth_end; ++k) {
+ norm += in_shaped(i, k) * in_shaped(i, k);
+ }
+ norm = alpha_ * norm + bias_;
+ DCHECK_GT(norm, T(1e-6));
+ for (int64 k = depth_begin; k < depth_end; ++k) {
+ T dyi = T(-2) * alpha_ * beta_ * in_shaped(i, k) *
+ activations(i, j) / norm;
+ if (k == j) {
+ dyi += Eigen::numext::pow(norm, -beta_);
}
+ dyi *= grads_shaped(i, j);
+ const_cast<typename TTypes<T, 2>::Tensor&>(out_shaped)(i, k) += dyi;
}
}
- };
- auto worker_threads =
- *(context->device()->tensorflow_cpu_worker_threads());
- Shard(worker_threads.num_threads, worker_threads.workers, nodes * batch,
- depth * depth, shard);
- }
+ }
+ };
+ auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads());
+ Shard(worker_threads.num_threads, worker_threads.workers, nodes * batch,
+ depth * depth, shard);
+ }
void SanityCheckInputs(OpKernelContext* context) {
const Tensor& input_gradient_tensor = MklGetInput(context, kIdxGradient);
@@ -1317,59 +1281,59 @@ class MklLRNGradOp : public OpKernel {
const Tensor& orig_output_tensor = MklGetInput(context, kIdxOrigOutput);
const Tensor& workspace_tensor = MklGetInput(context, kIdxWorkspace);
MklDnnShape in_grads_dnn_shape, in_image_dnn_shape, out_image_dnn_shape,
- workspace_dnn_shape;
+ workspace_dnn_shape;
GetMklShape(context, kIdxGradient, &in_grads_dnn_shape);
GetMklShape(context, kIdxOrigInput, &in_image_dnn_shape);
GetMklShape(context, kIdxOrigOutput, &out_image_dnn_shape);
GetMklShape(context, kIdxWorkspace, &workspace_dnn_shape);
if (in_grads_dnn_shape.IsMklTensor()) {
OP_REQUIRES(context, in_grads_dnn_shape.GetDimension() == 4,
- errors::InvalidArgument("Input gradient must be "
- "4-dimensional"));
+ errors::InvalidArgument("Input gradient must be "
+ "4-dimensional"));
} else {
- OP_REQUIRES(context, input_gradient_tensor.dims() == 4,
- errors::InvalidArgument("input gradient must be 4-dimensional"));
+ OP_REQUIRES(
+ context, input_gradient_tensor.dims() == 4,
+ errors::InvalidArgument("input gradient must be 4-dimensional"));
}
if (in_image_dnn_shape.IsMklTensor()) {
OP_REQUIRES(context, in_image_dnn_shape.GetDimension() == 4,
- errors::InvalidArgument("input images must be "
- "4-dimensional"));
+ errors::InvalidArgument("input images must be "
+ "4-dimensional"));
} else {
OP_REQUIRES(context, orig_input_tensor.dims() == 4,
errors::InvalidArgument("input images must be "
- "4-dimensional"));
+ "4-dimensional"));
}
if (out_image_dnn_shape.IsMklTensor()) {
OP_REQUIRES(context, out_image_dnn_shape.GetDimension() == 4,
- errors::InvalidArgument("Output image must be "
- "4-dimensional"));
+ errors::InvalidArgument("Output image must be "
+ "4-dimensional"));
} else {
- OP_REQUIRES(context, orig_output_tensor.dims() == 4,
- errors::InvalidArgument("Output image must be 4-dimensional"));
+ OP_REQUIRES(
+ context, orig_output_tensor.dims() == 4,
+ errors::InvalidArgument("Output image must be 4-dimensional"));
}
if (workspace_enabled_) {
if (workspace_dnn_shape.IsMklTensor()) {
- OP_REQUIRES(context, workspace_dnn_shape.IsMklTensor() == false,
- errors::InvalidArgument("Workspace should not be MKL Tensor."));
+ OP_REQUIRES(
+ context, workspace_dnn_shape.IsMklTensor() == false,
+ errors::InvalidArgument("Workspace should not be MKL Tensor."));
} else {
OP_REQUIRES(context, workspace_tensor.dims() == 1,
- errors::InvalidArgument("Workspace must be 1-dimensional"));
+ errors::InvalidArgument("Workspace must be 1-dimensional"));
}
}
}
-// Input("input_grads: T")
-// Input("input_image: T")
-// Input("output_image: T")
-// Input("workspace: uint8")
- const int kIdxGradient = 0,
- kIdxOrigInput = 1,
- kIdxOrigOutput = 2,
- kIdxWorkspace = 3,
- kIdxOutput = 0;
+ // Input("input_grads: T")
+ // Input("input_image: T")
+ // Input("output_image: T")
+ // Input("workspace: uint8")
+ const int kIdxGradient = 0, kIdxOrigInput = 1, kIdxOrigOutput = 2,
+ kIdxWorkspace = 3, kIdxOutput = 0;
typedef typename Eigen::Tensor<T, 1, Eigen::RowMajor>::DimensionPair DimPair;
bool workspace_enabled_;
@@ -1393,7 +1357,6 @@ class MklLRNGradOp : public OpKernel {
.Label(mkl_op_registry::kMklOpLabel), \
MklLRNGradOp<T>);
-
TF_CALL_float(REGISTER_MKL_LRN_CPU);
} // namespace tensorflow