aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/mkl_conv_ops.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/kernels/mkl_conv_ops.cc')
-rw-r--r--tensorflow/core/kernels/mkl_conv_ops.cc391
1 files changed, 194 insertions, 197 deletions
diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index 5a9a82d2e9..e5c4c21a10 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -43,7 +43,6 @@ limitations under the License.
namespace tensorflow {
typedef Eigen::ThreadPoolDevice CPUDevice;
-typedef Eigen::GpuDevice GPUDevice;
template <typename Device, typename T, bool biasEnabled>
class MklConv2DOp : public OpKernel {
@@ -70,9 +69,10 @@ class MklConv2DOp : public OpKernel {
}
void Compute(OpKernelContext* context) override {
+ MklConv2DOpContext mkl_context;
const Tensor& input = MklGetInput(context, 0);
- GetMklShape(context, 0, &(mkl_params_.input_shape));
- bool input_in_mkl_format = mkl_params_.input_shape.IsMklTensor();
+ GetMklShape(context, 0, &(mkl_context.input_shape));
+ bool input_in_mkl_format = mkl_context.input_shape.IsMklTensor();
const Tensor& filter = MklGetInput(context, 1);
MklShape mkl_filter_shape;
@@ -104,9 +104,9 @@ class MklConv2DOp : public OpKernel {
errors::InvalidArgument("filter too large"));
}
- const int64 input_depth = input_in_mkl_format
- ? mkl_params_.input_shape.GetSizes()[2]
- : GetTensorDim(input, data_format_, 'C');
+ const int64 input_depth =
+ input_in_mkl_format ? GetMklTensorDim(mkl_context.input_shape, 'C')
+ : GetTensorDim(input, data_format_, 'C');
OP_REQUIRES(context, input_depth == filter.dim_size(2),
errors::InvalidArgument(
"input and filter must have the same depth: ", input_depth,
@@ -116,9 +116,9 @@ class MklConv2DOp : public OpKernel {
// The second dimension for input is rows/height.
// The first dimension for filter is rows/height.
- const int64 input_rows_raw = input_in_mkl_format
- ? mkl_params_.input_shape.GetSizes()[1]
- : GetTensorDim(input, data_format_, 'H');
+ const int64 input_rows_raw =
+ input_in_mkl_format ? GetMklTensorDim(mkl_context.input_shape, 'H')
+ : GetTensorDim(input, data_format_, 'H');
OP_REQUIRES(
context,
FastBoundsCheck(input_rows_raw, std::numeric_limits<int>::max()),
@@ -128,9 +128,9 @@ class MklConv2DOp : public OpKernel {
// The third dimension for input is columns/width.
// The second dimension for filter is columns/width.
- const int64 input_cols_raw = input_in_mkl_format
- ? mkl_params_.input_shape.GetSizes()[0]
- : GetTensorDim(input, data_format_, 'W');
+ const int64 input_cols_raw =
+ input_in_mkl_format ? GetMklTensorDim(mkl_context.input_shape, 'W')
+ : GetTensorDim(input, data_format_, 'W');
OP_REQUIRES(
context,
FastBoundsCheck(input_cols_raw, std::numeric_limits<int>::max()),
@@ -139,9 +139,9 @@ class MklConv2DOp : public OpKernel {
const int filter_cols = static_cast<int>(filter.dim_size(1));
// The first dimension for input is batch.
- const int64 input_batch_raw = input_in_mkl_format
- ? mkl_params_.input_shape.GetSizes()[3]
- : GetTensorDim(input, data_format_, 'N');
+ const int64 input_batch_raw =
+ input_in_mkl_format ? GetMklTensorDim(mkl_context.input_shape, 'N')
+ : GetTensorDim(input, data_format_, 'N');
OP_REQUIRES(
context,
FastBoundsCheck(input_batch_raw, std::numeric_limits<int>::max()),
@@ -184,98 +184,105 @@ class MklConv2DOp : public OpKernel {
}
// Create MKL convolution primitives
- mkl_params_.in_dims = input_in_mkl_format
- ? mkl_params_.input_shape.GetDimension()
+ mkl_context.in_dims = input_in_mkl_format
+ ? mkl_context.input_shape.GetDimension()
: input.dims();
- mkl_params_.filter_dims = filter.dims();
- mkl_params_.in_sizes[0] = static_cast<size_t>(input_cols);
- mkl_params_.in_sizes[1] = static_cast<size_t>(input_rows);
- mkl_params_.in_sizes[2] = static_cast<size_t>(input_depth);
- mkl_params_.in_sizes[3] = static_cast<size_t>(batch);
- mkl_params_.out_sizes[0] = static_cast<size_t>(out_cols);
- mkl_params_.out_sizes[1] = static_cast<size_t>(out_rows);
- mkl_params_.out_sizes[2] = static_cast<size_t>(out_depth);
- mkl_params_.out_sizes[3] = static_cast<size_t>(batch);
- mkl_params_.input_offset[0] = static_cast<int>(-pad_cols);
- mkl_params_.input_offset[1] = static_cast<int>(-pad_rows);
- mkl_params_.conv_stride[0] = static_cast<size_t>(stride_cols);
- mkl_params_.conv_stride[1] = static_cast<size_t>(stride_rows);
-
- GetStridesFromSizes(data_format_, mkl_params_.out_strides,
- mkl_params_.out_sizes);
- GetStridesFromSizes(data_format_, mkl_params_.in_strides,
- mkl_params_.in_sizes);
+ mkl_context.filter_dims = filter.dims();
+
+ mkl_context.in_sizes[MklDims::W] = static_cast<size_t>(input_cols);
+ mkl_context.in_sizes[MklDims::H] = static_cast<size_t>(input_rows);
+ mkl_context.in_sizes[MklDims::C] = static_cast<size_t>(input_depth);
+ mkl_context.in_sizes[MklDims::N] = static_cast<size_t>(batch);
+
+ mkl_context.out_sizes[MklDims::W] = static_cast<size_t>(out_cols);
+ mkl_context.out_sizes[MklDims::H] = static_cast<size_t>(out_rows);
+ mkl_context.out_sizes[MklDims::C] = static_cast<size_t>(out_depth);
+ mkl_context.out_sizes[MklDims::N] = static_cast<size_t>(batch);
+
+ mkl_context.input_offset[0] = static_cast<int>(-pad_cols);
+ mkl_context.input_offset[1] = static_cast<int>(-pad_rows);
+
+ mkl_context.conv_stride[0] = static_cast<size_t>(stride_cols);
+ mkl_context.conv_stride[1] = static_cast<size_t>(stride_rows);
+
+ GetStridesFromSizes(data_format_, mkl_context.out_strides,
+ mkl_context.out_sizes);
+ GetStridesFromSizes(data_format_, mkl_context.in_strides,
+ mkl_context.in_sizes);
// TF filter dimension order (out_depth, in_depth, cols, rows) ->
// MKL filter dimension order (out_depth, in_depth, rows, cols)
- mkl_params_.filter_sizes[0] = filter.dim_size(1); // cols
- mkl_params_.filter_sizes[1] = filter.dim_size(0); // rows
- mkl_params_.filter_sizes[2] = filter.dim_size(2); // in_depth
- mkl_params_.filter_sizes[3] = filter.dim_size(3); // out_depth
+ mkl_context.filter_sizes[0] = filter.dim_size(1); // cols
+ mkl_context.filter_sizes[1] = filter.dim_size(0); // rows
+ mkl_context.filter_sizes[2] = filter.dim_size(2); // in_depth
+ mkl_context.filter_sizes[3] = filter.dim_size(3); // out_depth
// TF filter layout - (rows, cols, in_depth, out_depth)
- mkl_params_.filter_strides[0] =
+ mkl_context.filter_strides[0] =
filter.dim_size(2) * filter.dim_size(3); // cols
- mkl_params_.filter_strides[1] =
+ mkl_context.filter_strides[1] =
filter.dim_size(1) * filter.dim_size(2) * filter.dim_size(3); // rows
- mkl_params_.filter_strides[2] = filter.dim_size(3); // in_depth
- mkl_params_.filter_strides[3] = 1; // out_depth
+ mkl_context.filter_strides[2] = filter.dim_size(3); // in_depth
+ mkl_context.filter_strides[3] = 1; // out_depth
if (biasEnabled) {
const Tensor& bias = MklGetInput(context, 2);
- mkl_params_.bias_sizes[0] = {static_cast<size_t>(bias.dim_size(0))};
- mkl_params_.bias_strides[0] = {1};
+ mkl_context.bias_sizes[0] = {static_cast<size_t>(bias.dim_size(0))};
+ mkl_context.bias_strides[0] = {1};
}
// Create Convolution Primitive
if (biasEnabled) {
- CHECK_EQ(dnnConvolutionCreateForwardBias_F32(
- &mkl_prim_convolution_fwd_, nullptr,
- dnnAlgorithmConvolutionDirect, mkl_params_.in_dims,
- mkl_params_.in_sizes, mkl_params_.out_sizes,
- mkl_params_.filter_sizes, mkl_params_.conv_stride,
- mkl_params_.input_offset, dnnBorderZeros),
- E_SUCCESS);
+ CHECK_EQ(
+ dnnConvolutionCreateForwardBias_F32(
+ &mkl_context.prim_fwd, nullptr, dnnAlgorithmConvolutionDirect,
+ mkl_context.in_dims, mkl_context.in_sizes, mkl_context.out_sizes,
+ mkl_context.filter_sizes, mkl_context.conv_stride,
+ mkl_context.input_offset, dnnBorderZeros),
+ E_SUCCESS);
} else {
- CHECK_EQ(dnnConvolutionCreateForward_F32(
- &mkl_prim_convolution_fwd_, nullptr,
- dnnAlgorithmConvolutionDirect, mkl_params_.in_dims,
- mkl_params_.in_sizes, mkl_params_.out_sizes,
- mkl_params_.filter_sizes, mkl_params_.conv_stride,
- mkl_params_.input_offset, dnnBorderZeros),
- E_SUCCESS);
+ CHECK_EQ(
+ dnnConvolutionCreateForward_F32(
+ &mkl_context.prim_fwd, nullptr, dnnAlgorithmConvolutionDirect,
+ mkl_context.in_dims, mkl_context.in_sizes, mkl_context.out_sizes,
+ mkl_context.filter_sizes, mkl_context.conv_stride,
+ mkl_context.input_offset, dnnBorderZeros),
+ E_SUCCESS);
}
TensorShape mkl_output_tf_shape;
MklShape mkl_output_mkl_shape;
mkl_output_mkl_shape.SetMklTensor(true);
- mkl_output_mkl_shape.SetMklLayout(mkl_prim_convolution_fwd_,
- dnnResourceDst);
- mkl_output_mkl_shape.SetTfLayout(mkl_params_.in_dims, mkl_params_.out_sizes,
- mkl_params_.out_strides);
+ mkl_output_mkl_shape.SetMklLayout(mkl_context.prim_fwd, dnnResourceDst);
+ mkl_output_mkl_shape.SetTfLayout(mkl_context.in_dims, mkl_context.out_sizes,
+ mkl_context.out_strides);
+ // MKL might change the dimension ordering
+ // Create mapping to recover the original TF dimension order
+ mkl_output_mkl_shape.SetTfDimOrder(mkl_context.in_dims, data_format_);
+
mkl_output_tf_shape.AddDim(
dnnLayoutGetMemorySize_F32(
static_cast<dnnLayout_t>(mkl_output_mkl_shape.GetMklLayout())) /
sizeof(T));
AllocateOutputSetMklshape(context, 0, &output, mkl_output_tf_shape,
mkl_output_mkl_shape);
- mkl_conv_res_[dnnResourceDst] =
+ mkl_context.conv_res[dnnResourceDst] =
static_cast<void*>(output->flat<T>().data());
- MklCreateInputLayouts(context);
+ mkl_context.MklCreateInputLayouts(context);
Tensor mkl_tmp_input_buf_tensor, mkl_tmp_filter_buf_tensor,
mkl_tmp_bias_buf_tensor; // Temp tensor used to allocate tmp
// buffers
- MklPrepareConvolutionInputs(context, &mkl_tmp_input_buf_tensor,
- &mkl_tmp_filter_buf_tensor,
- &mkl_tmp_bias_buf_tensor);
+ mkl_context.MklPrepareConvolutionInputs(context, &mkl_tmp_input_buf_tensor,
+ &mkl_tmp_filter_buf_tensor,
+ &mkl_tmp_bias_buf_tensor);
// Execute convolution
- CHECK_EQ(dnnExecute_F32(mkl_prim_convolution_fwd_, mkl_conv_res_),
+ CHECK_EQ(dnnExecute_F32(mkl_context.prim_fwd, mkl_context.conv_res),
E_SUCCESS);
- MklCleanup();
+ mkl_context.MklCleanup();
}
private:
@@ -293,151 +300,141 @@ class MklConv2DOp : public OpKernel {
int input_offset[2];
size_t conv_stride[2];
MklShape input_shape;
- } MklConv2DOpParams;
-
- // Create MKL dnnLayout_t objects for tensors coming into the layer
- void MklCreateInputLayouts(OpKernelContext* context) {
- bool input_in_mkl_format = mkl_params_.input_shape.IsMklTensor();
- if (input_in_mkl_format) {
- mkl_lt_input_ =
- static_cast<dnnLayout_t>(mkl_params_.input_shape.GetCurLayout());
- } else {
- CHECK_EQ(
- dnnLayoutCreate_F32(&mkl_lt_input_, mkl_params_.in_dims,
- mkl_params_.in_sizes, mkl_params_.in_strides),
- E_SUCCESS);
- }
-
- CHECK_EQ(dnnLayoutCreate_F32(&mkl_lt_filter_, mkl_params_.filter_dims,
- mkl_params_.filter_sizes,
- mkl_params_.filter_strides),
- E_SUCCESS);
+ dnnPrimitive_t prim_fwd;
+ void* conv_res[dnnResourceNumber];
+ dnnLayout_t lt_filter, lt_bias, lt_input;
+
+ // Create MKL dnnLayout_t objects for tensors coming into the layer
+ void MklCreateInputLayouts(OpKernelContext* context) {
+ bool input_in_mkl_format = input_shape.IsMklTensor();
+ if (input_in_mkl_format) {
+ lt_input = static_cast<dnnLayout_t>(input_shape.GetCurLayout());
+ } else {
+ CHECK_EQ(dnnLayoutCreate_F32(&lt_input, in_dims, in_sizes, in_strides),
+ E_SUCCESS);
+ }
- if (biasEnabled) {
- CHECK_EQ(dnnLayoutCreate_F32(&mkl_lt_bias_, 1, mkl_params_.bias_sizes,
- mkl_params_.bias_strides),
+ CHECK_EQ(dnnLayoutCreate_F32(&lt_filter, filter_dims, filter_sizes,
+ filter_strides),
E_SUCCESS);
- }
- }
- // Compare incoming tensor layouts with MKL preferred layouts and convert
- // data to the preferred layout if necessary
- void MklPrepareConvolutionInputs(OpKernelContext* context,
- Tensor* mkl_tmp_input_buf_tensor,
- Tensor* mkl_tmp_filter_buf_tensor,
- Tensor* mkl_tmp_bias_buf_tensor) {
- bool mkl_convert_input, mkl_convert_filter, mkl_convert_bias;
- dnnPrimitive_t mkl_prim_convert_filter, mkl_prim_convert_bias,
- mkl_prim_convert_input;
- dnnLayout_t mkl_lt_internal_filter, mkl_lt_internal_bias,
- mkl_lt_internal_input;
- void *mkl_buf_convert_input, *mkl_buf_convert_filter, *mkl_buf_convert_bias;
- mkl_prim_convert_filter = nullptr;
- mkl_prim_convert_bias = nullptr;
- mkl_prim_convert_input = nullptr;
- mkl_lt_internal_filter = nullptr;
- mkl_lt_internal_bias = nullptr;
- mkl_lt_internal_input = nullptr;
- mkl_buf_convert_input = nullptr;
- mkl_buf_convert_filter = nullptr;
- mkl_buf_convert_bias = nullptr;
-
- // Compare with internal layouts and convert if needed
- const Tensor& input = MklGetInput(context, 0);
- void* mkl_buf_input =
- const_cast<void*>(static_cast<const void*>(input.flat<T>().data()));
- CHECK_EQ(
- dnnLayoutCreateFromPrimitive_F32(
- &mkl_lt_internal_input, mkl_prim_convolution_fwd_, dnnResourceSrc),
- E_SUCCESS);
- mkl_convert_input =
- !dnnLayoutCompare_F32(mkl_lt_internal_input, mkl_lt_input_);
- if (mkl_convert_input) {
- CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_input, mkl_lt_input_,
- mkl_lt_internal_input),
- E_SUCCESS);
- AllocTmpBuffer(context, mkl_tmp_input_buf_tensor, mkl_lt_internal_input,
- &mkl_buf_convert_input);
- CHECK_EQ(dnnConversionExecute_F32(mkl_prim_convert_input, mkl_buf_input,
- mkl_buf_convert_input),
- E_SUCCESS);
- dnnDelete_F32(mkl_prim_convert_input);
+ if (biasEnabled) {
+ CHECK_EQ(dnnLayoutCreate_F32(&lt_bias, 1, bias_sizes, bias_strides),
+ E_SUCCESS);
+ }
}
- dnnLayoutDelete_F32(mkl_lt_internal_input);
-
- mkl_conv_res_[dnnResourceSrc] =
- (mkl_convert_input) ? mkl_buf_convert_input : mkl_buf_input;
- const Tensor& filter = MklGetInput(context, 1);
- void* mkl_buf_filter =
- const_cast<void*>(static_cast<const void*>(filter.flat<T>().data()));
- CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(&mkl_lt_internal_filter,
- mkl_prim_convolution_fwd_,
- dnnResourceFilter),
- E_SUCCESS);
- mkl_convert_filter =
- !dnnLayoutCompare_F32(mkl_lt_internal_filter, mkl_lt_filter_);
- if (mkl_convert_filter) {
- CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_filter, mkl_lt_filter_,
- mkl_lt_internal_filter),
- E_SUCCESS);
- AllocTmpBuffer(context, mkl_tmp_filter_buf_tensor, mkl_lt_internal_filter,
- &mkl_buf_convert_filter);
- CHECK_EQ(dnnConversionExecute_F32(mkl_prim_convert_filter, mkl_buf_filter,
- mkl_buf_convert_filter),
+ // Compare incoming tensor layouts with MKL preferred layouts and convert
+ // data to the preferred layout if necessary
+ void MklPrepareConvolutionInputs(OpKernelContext* context,
+ Tensor* mkl_tmp_input_buf_tensor,
+ Tensor* mkl_tmp_filter_buf_tensor,
+ Tensor* mkl_tmp_bias_buf_tensor) {
+ bool mkl_convert_input, mkl_convert_filter, mkl_convert_bias;
+ dnnPrimitive_t mkl_prim_convert_filter, mkl_prim_convert_bias,
+ mkl_prim_convert_input;
+ dnnLayout_t mkl_lt_internal_filter, mkl_lt_internal_bias,
+ mkl_lt_internal_input;
+ void *mkl_buf_convert_input, *mkl_buf_convert_filter,
+ *mkl_buf_convert_bias;
+ mkl_prim_convert_filter = nullptr;
+ mkl_prim_convert_bias = nullptr;
+ mkl_prim_convert_input = nullptr;
+ mkl_lt_internal_filter = nullptr;
+ mkl_lt_internal_bias = nullptr;
+ mkl_lt_internal_input = nullptr;
+ mkl_buf_convert_input = nullptr;
+ mkl_buf_convert_filter = nullptr;
+ mkl_buf_convert_bias = nullptr;
+
+ // Compare with internal layouts and convert if needed
+ const Tensor& input = MklGetInput(context, 0);
+ void* mkl_buf_input =
+ const_cast<void*>(static_cast<const void*>(input.flat<T>().data()));
+ CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(&mkl_lt_internal_input,
+ prim_fwd, dnnResourceSrc),
E_SUCCESS);
- dnnDelete_F32(mkl_prim_convert_filter);
- }
- dnnLayoutDelete_F32(mkl_lt_internal_filter);
+ mkl_convert_input =
+ !dnnLayoutCompare_F32(mkl_lt_internal_input, lt_input);
+ if (mkl_convert_input) {
+ CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_input, lt_input,
+ mkl_lt_internal_input),
+ E_SUCCESS);
+ AllocTmpBuffer(context, mkl_tmp_input_buf_tensor, mkl_lt_internal_input,
+ &mkl_buf_convert_input);
+ CHECK_EQ(dnnConversionExecute_F32(mkl_prim_convert_input, mkl_buf_input,
+ mkl_buf_convert_input),
+ E_SUCCESS);
+ dnnDelete_F32(mkl_prim_convert_input);
+ }
+ dnnLayoutDelete_F32(mkl_lt_internal_input);
- mkl_conv_res_[dnnResourceFilter] =
- (mkl_convert_filter) ? mkl_buf_convert_filter : mkl_buf_filter;
+ conv_res[dnnResourceSrc] =
+ (mkl_convert_input) ? mkl_buf_convert_input : mkl_buf_input;
- if (biasEnabled) {
- const Tensor& bias = MklGetInput(context, 2);
- void* mkl_buf_bias =
- const_cast<void*>(static_cast<const void*>(bias.flat<T>().data()));
- CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(&mkl_lt_internal_bias,
- mkl_prim_convolution_fwd_,
- dnnResourceBias),
+ const Tensor& filter = MklGetInput(context, 1);
+ void* mkl_buf_filter =
+ const_cast<void*>(static_cast<const void*>(filter.flat<T>().data()));
+ CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(&mkl_lt_internal_filter,
+ prim_fwd, dnnResourceFilter),
E_SUCCESS);
- mkl_convert_bias =
- !dnnLayoutCompare_F32(mkl_lt_internal_bias, mkl_lt_bias_);
- if (mkl_convert_bias) {
- CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_bias, mkl_lt_bias_,
- mkl_lt_internal_bias),
+ mkl_convert_filter =
+ !dnnLayoutCompare_F32(mkl_lt_internal_filter, lt_filter);
+ if (mkl_convert_filter) {
+ CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_filter, lt_filter,
+ mkl_lt_internal_filter),
E_SUCCESS);
- AllocTmpBuffer(context, mkl_tmp_bias_buf_tensor, mkl_lt_internal_bias,
- &mkl_buf_convert_bias);
- CHECK_EQ(dnnConversionExecute_F32(mkl_prim_convert_bias, mkl_buf_bias,
- mkl_buf_convert_bias),
- E_SUCCESS);
- dnnDelete_F32(mkl_prim_convert_bias);
+ AllocTmpBuffer(context, mkl_tmp_filter_buf_tensor,
+ mkl_lt_internal_filter, &mkl_buf_convert_filter);
+ CHECK_EQ(
+ dnnConversionExecute_F32(mkl_prim_convert_filter, mkl_buf_filter,
+ mkl_buf_convert_filter),
+ E_SUCCESS);
+ dnnDelete_F32(mkl_prim_convert_filter);
}
- dnnLayoutDelete_F32(mkl_lt_internal_bias);
+ dnnLayoutDelete_F32(mkl_lt_internal_filter);
+
+ conv_res[dnnResourceFilter] =
+ (mkl_convert_filter) ? mkl_buf_convert_filter : mkl_buf_filter;
- mkl_conv_res_[dnnResourceBias] =
- (mkl_convert_bias) ? mkl_buf_convert_bias : mkl_buf_bias;
+ if (biasEnabled) {
+ const Tensor& bias = MklGetInput(context, 2);
+ void* mkl_buf_bias =
+ const_cast<void*>(static_cast<const void*>(bias.flat<T>().data()));
+ CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(&mkl_lt_internal_bias,
+ prim_fwd, dnnResourceBias),
+ E_SUCCESS);
+ mkl_convert_bias = !dnnLayoutCompare_F32(mkl_lt_internal_bias, lt_bias);
+ if (mkl_convert_bias) {
+ CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_bias, lt_bias,
+ mkl_lt_internal_bias),
+ E_SUCCESS);
+ AllocTmpBuffer(context, mkl_tmp_bias_buf_tensor, mkl_lt_internal_bias,
+ &mkl_buf_convert_bias);
+ CHECK_EQ(dnnConversionExecute_F32(mkl_prim_convert_bias, mkl_buf_bias,
+ mkl_buf_convert_bias),
+ E_SUCCESS);
+ dnnDelete_F32(mkl_prim_convert_bias);
+ }
+ dnnLayoutDelete_F32(mkl_lt_internal_bias);
+
+ conv_res[dnnResourceBias] =
+ (mkl_convert_bias) ? mkl_buf_convert_bias : mkl_buf_bias;
+ }
}
- }
- void MklCleanup() {
- bool input_in_mkl_format = mkl_params_.input_shape.IsMklTensor();
- dnnDelete_F32(mkl_prim_convolution_fwd_);
- if (!input_in_mkl_format) dnnLayoutDelete_F32(mkl_lt_input_);
- dnnLayoutDelete_F32(mkl_lt_filter_);
- if (biasEnabled) dnnLayoutDelete_F32(mkl_lt_bias_);
- }
+ void MklCleanup() {
+ bool input_in_mkl_format = input_shape.IsMklTensor();
+ dnnDelete_F32(prim_fwd);
+ if (!input_in_mkl_format) dnnLayoutDelete_F32(lt_input);
+ dnnLayoutDelete_F32(lt_filter);
+ if (biasEnabled) dnnLayoutDelete_F32(lt_bias);
+ }
+ } MklConv2DOpContext;
std::vector<int32> strides_;
Padding padding_;
TensorFormat data_format_;
-
- MklConv2DOpParams mkl_params_;
- dnnPrimitive_t mkl_prim_convolution_fwd_ = nullptr;
- void* mkl_conv_res_[dnnResourceNumber];
- dnnLayout_t mkl_lt_filter_ = nullptr, mkl_lt_bias_ = nullptr,
- mkl_lt_input_ = nullptr;
};
#define REGISTER_MKL_CPU(T) \