aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/mkl_concat_op.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/kernels/mkl_concat_op.cc')
-rw-r--r--tensorflow/core/kernels/mkl_concat_op.cc458
1 files changed, 458 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc
new file mode 100644
index 0000000000..27930c44a6
--- /dev/null
+++ b/tensorflow/core/kernels/mkl_concat_op.cc
@@ -0,0 +1,458 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifdef INTEL_MKL
+
+#include <limits>
+#include <vector>
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/kernels/concat_lib.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/types.h"
+
+#include "third_party/mkl/include/mkl_dnn.h"
+#include "third_party/mkl/include/mkl_dnn_types.h"
+#include "tensorflow/core/util/mkl_util.h"
+
+namespace tensorflow {
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+enum AxisArgumentName { NAME_IS_AXIS, NAME_IS_CONCAT_DIM };
+
+// TODO(intelft) Check if we can reuse existing EigenConcatOp using Mutable
+// reference inputs.
+// --------------------------------------------------------------------------
+// Eigen Concat Op
+// --------------------------------------------------------------------------
+template <typename Device, typename T, AxisArgumentName AxisArgName>
+class EigenConcatBaseOp : public OpKernel {
+ public:
+ typedef std::vector<std::unique_ptr<typename TTypes<T, 2>::ConstMatrix>>
+ ConstMatrixVector;
+
+ explicit EigenConcatBaseOp(OpKernelConstruction* c) : OpKernel(c) {}
+
+ // Although, we modify Compute for this call to accept one extra param,
+ // we need to have empty Compute because Compute is pure virtual function.
+ void Compute(OpKernelContext* c) {}
+
+ void Compute(OpKernelContext* c, const std::vector<Tensor>& values) {
+ const Tensor* concat_dim_tensor;
+ const char* axis_attribute_name =
+ AxisArgName == NAME_IS_AXIS
+ ? "axis"
+ : AxisArgName == NAME_IS_CONCAT_DIM ? "concat_dim" : "<invalid>";
+ OP_REQUIRES_OK(c, c->input(axis_attribute_name, &concat_dim_tensor));
+ OP_REQUIRES(c, IsLegacyScalar(concat_dim_tensor->shape()),
+ errors::InvalidArgument(
+ axis_attribute_name,
+ " tensor should be a scalar integer, but got shape ",
+ concat_dim_tensor->shape().DebugString()));
+ const int32 concat_dim =
+ internal::SubtleMustCopy(concat_dim_tensor->scalar<int32>()());
+ // Instead of accessing values from context, we use input to Compute.
+ const int N = values.size();
+ const int input_dims = values[0].dims();
+ const TensorShape& input_shape = values[0].shape();
+
+ int32 axis = concat_dim < 0 ? concat_dim + input_dims : concat_dim;
+ OP_REQUIRES(c,
+ (0 <= axis && axis < input_dims) ||
+ (allow_legacy_scalars() && concat_dim == 0),
+ errors::InvalidArgument(
+ "ConcatOp : Expected concatenating dimensions in the range "
+ "[",
+ -input_dims, ", ", input_dims, "), but got ", concat_dim));
+ // Note that we reduce the concat of n-dimensional tensors into a two
+ // dimensional concat. Assuming the dimensions of any input/output
+ // tensor are {x0, x1,...,xn-1, y0, y1,...,ym-1}, where the concat is along
+ // the dimension indicated with size y0, we flatten it to {x, y}, where y =
+ // Prod_i(yi) and x = ((n > 0) ? Prod_i(xi) : 1).
+ ConstMatrixVector inputs_flat;
+ inputs_flat.reserve(N);
+ int64 inputs_flat_dim0 = 1;
+ for (int d = 0; d < axis; ++d) {
+ inputs_flat_dim0 *= input_shape.dim_size(d);
+ }
+ int64 output_concat_dim = 0;
+ const bool input_is_scalar = IsLegacyScalar(input_shape);
+ for (int i = 0; i < N; ++i) {
+ const auto in = values[i];
+ const bool in_is_scalar = IsLegacyScalar(in.shape());
+ OP_REQUIRES(
+ c, in.dims() == input_dims || (input_is_scalar && in_is_scalar),
+ errors::InvalidArgument(
+ "ConcatOp : Ranks of all input tensors should match: shape[0] = ",
+ input_shape.DebugString(), " vs. shape[", i,
+ "] = ", in.shape().DebugString()));
+ for (int j = 0; j < input_dims; ++j) {
+ if (j == axis) {
+ continue;
+ }
+ OP_REQUIRES(
+ c, in.dim_size(j) == input_shape.dim_size(j),
+ errors::InvalidArgument(
+ "ConcatOp : Dimensions of inputs should match: shape[0] = ",
+ input_shape.DebugString(), " vs. shape[", i,
+ "] = ", in.shape().DebugString()));
+ }
+ if (in.NumElements() > 0) {
+ int64 inputs_flat_dim1 = in.NumElements() / inputs_flat_dim0;
+ inputs_flat.emplace_back(new typename TTypes<T, 2>::ConstMatrix(
+ in.shaped<T, 2>({inputs_flat_dim0, inputs_flat_dim1})));
+ }
+ // TODO(irving): Remove check once !allow_legacy_scalars().
+ output_concat_dim += in.dims() > 0 ? in.dim_size(axis) : 1;
+ }
+
+ TensorShape output_shape(input_shape);
+ // TODO(irving): Remove rank 0 case once !allow_legacy_scalars().
+ if (output_shape.dims() == 0) {
+ output_shape.AddDim(output_concat_dim);
+ } else {
+ output_shape.set_dim(axis, output_concat_dim);
+ }
+ Tensor* output = nullptr;
+ OP_REQUIRES_OK(c, c->allocate_output(0, output_shape, &output));
+ if (output->NumElements() > 0) {
+ int64 output_dim1 = output->NumElements() / inputs_flat_dim0;
+ auto output_flat = output->shaped<T, 2>({inputs_flat_dim0, output_dim1});
+ ConcatCPU<T>(c->device(), inputs_flat, &output_flat);
+ }
+ }
+};
+
+// --------------------------------------------------------------------------
+// Mkl Concat Op
+// --------------------------------------------------------------------------
+
+template <typename Device, typename T, AxisArgumentName AxisArgName>
+class MklConcatOp : public OpKernel {
+ private:
+ TensorFormat data_format_;
+ EigenConcatBaseOp<Device, T, AxisArgName> eigen_concat_op_;
+
+ public:
+ typedef std::vector<std::unique_ptr<typename TTypes<T, 2>::ConstMatrix>>
+ ConstMatrixVector;
+
+ explicit MklConcatOp(OpKernelConstruction* c)
+ : OpKernel(c), eigen_concat_op_(c) {}
+
+ void Compute(OpKernelContext* context) override {
+ MklConcatOpContext mkl_context;
+
+ // Get input tensors.
+ OpInputList input_tensors;
+ GetMklInputList(context, "values", &input_tensors);
+ const int N = input_tensors.size();
+ // Get MKL shapes.
+ MklShapeList input_shapes(N);
+ GetMklShapeList(context, "values", &input_shapes);
+
+ // If this is Concat, then concat_dim is 0th input.
+ // If this is ConcatV2, then axis is Nth input.
+ const Tensor& concat_dim_tensor = AxisArgName == NAME_IS_CONCAT_DIM
+ ? MklGetInput(context, 0)
+ : MklGetInput(context, N);
+
+ // Sanity checks
+ OP_REQUIRES(
+ context, IsLegacyScalar(concat_dim_tensor.shape()),
+ errors::InvalidArgument(
+ "Concat dim tensor should be a scalar integer, but got shape ",
+ concat_dim_tensor.shape().DebugString()));
+ int32 concat_dim =
+ internal::SubtleMustCopy(concat_dim_tensor.scalar<int32>()());
+
+ MklShape& inpshape0 = input_shapes[0];
+
+ // Check that all tensors are Mkl, if not we call Eigen version.
+ bool invoke_eigen = false;
+ bool is_concat_dim_channel = true;
+ if (!AreAllMklTensors(input_shapes)) {
+ invoke_eigen = true;
+ }
+
+ // Check that total number of dimensions is 4, if not call Eigen.
+ if (!invoke_eigen) {
+ for (auto& s : input_shapes) {
+ if (s.GetDimension() != 4) {
+ invoke_eigen = true;
+ break;
+ }
+ }
+ }
+
+ // check that concat_dim is channel, if not call Eigen version.
+ if (!invoke_eigen) {
+ for (auto& s : input_shapes) {
+ if (!s.IsMklChannelDim(concat_dim)) {
+ invoke_eigen = true;
+ is_concat_dim_channel = false;
+ break;
+ }
+ }
+ }
+
+ if (invoke_eigen) {
+ string msg = std::string("Invoking Eigen version of Concat. Reason:") +
+ (!is_concat_dim_channel
+ ? std::string("Concat dimension is not channel")
+ : std::string("Not all tensors are in Mkl layout"));
+ VLOG(1) << "_MklConcatOp: " << msg;
+ CallEigenVersion(context, input_tensors, input_shapes);
+ return;
+ }
+
+ // For MKL format, the channel is dimension number 2.
+ // So if we are concating over channel and _all_ inputs are in MKL
+ // format, then we set concat_dim to 2.
+ // Since we have reached till here, it means we are concating
+ // over channel.
+ concat_dim = MklDims::C;
+
+ // One more sanity check: check that ranks of all tensors match
+ // and that their shapes match except for concat_dim.
+ int i = 0;
+ for (auto& s : input_shapes) {
+ size_t exp_dims = inpshape0.GetDimension();
+ OP_REQUIRES(context, s.GetDimension() == exp_dims,
+ errors::InvalidArgument(
+ "_MklConcatOp : Ranks of all input tensors should match:"
+ " input dimensions = ",
+ s.GetDimension(), " vs. expected rank = ", exp_dims));
+
+ for (int d = 0; d < exp_dims; ++d) {
+ if (d == concat_dim) {
+ continue;
+ }
+
+ size_t exp_size = inpshape0.GetSizes()[d];
+ OP_REQUIRES(
+ context, exp_size == s.GetSizes()[d],
+ errors::InvalidArgument("_MklConcatOp : Dimensions of inputs"
+ "should match: shape[0][",
+ d, "]= ", exp_size, " vs. shape[", i, "][",
+ d, "] = ", s.GetSizes()[d]));
+ }
+ ++i;
+ }
+
+ // Use input MKL layout instead of creating new layouts.
+ int64 output_concat_dim_size = 0;
+ for (auto& s : input_shapes) {
+ output_concat_dim_size +=
+ s.GetDimension() > 0 ? s.GetSizes()[concat_dim] : 1;
+ }
+ mkl_context.MklCreateInputLayouts(context, input_shapes);
+
+ CHECK_EQ(dnnConcatCreate_F32(&mkl_context.prim_concat, NULL, N,
+ &mkl_context.lt_inputs[0]),
+ E_SUCCESS);
+
+ // Calculate output sizes and strides
+ TensorFormat data_format;
+ if (inpshape0.IsTensorInNHWCFormat()) {
+ data_format = FORMAT_NHWC;
+ } else {
+ OP_REQUIRES(
+ context, inpshape0.IsTensorInNCHWFormat(),
+ errors::InvalidArgument(
+ "_MklConcat only supports all inputs in NCHW or NHWC format "));
+ data_format = FORMAT_NCHW;
+ }
+
+ // Since all tensors are in Mkl layout, we copy sizes from input tensor.
+ mkl_context.out_sizes[MklDims::W] = inpshape0.GetSizes()[MklDims::W];
+ mkl_context.out_sizes[MklDims::H] = inpshape0.GetSizes()[MklDims::H];
+ mkl_context.out_sizes[MklDims::C] = output_concat_dim_size;
+ mkl_context.out_sizes[MklDims::N] = inpshape0.GetSizes()[MklDims::N];
+ GetStridesFromSizes(data_format, mkl_context.out_strides,
+ mkl_context.out_sizes);
+
+ // Set output Mkl shape.
+ int64 dim = 4;
+ MklShape mkl_output_mkl_shape;
+ mkl_output_mkl_shape.SetMklTensor(true);
+ mkl_output_mkl_shape.SetMklLayout(mkl_context.prim_concat, dnnResourceDst);
+ mkl_output_mkl_shape.SetTfLayout(dim, mkl_context.out_sizes,
+ mkl_context.out_strides);
+ mkl_output_mkl_shape.SetTfDimOrder(dim, inpshape0.GetTfToMklDimMap());
+
+ TensorShape mkl_output_tf_shape;
+ mkl_output_tf_shape.AddDim(1);
+ mkl_output_tf_shape.AddDim(
+ dnnLayoutGetMemorySize_F32(
+ static_cast<dnnLayout_t>(mkl_output_mkl_shape.GetMklLayout())) /
+ sizeof(T));
+
+ Tensor* output = nullptr;
+ AllocateOutputSetMklShape(context, 0, &output, mkl_output_tf_shape,
+ mkl_output_mkl_shape);
+
+ // Set destination resource.
+ mkl_context.concat_res[dnnResourceDst] =
+ const_cast<void*>(static_cast<const void*>(output->flat<T>().data()));
+
+ mkl_context.mkl_tmp_tensors.resize(N);
+ mkl_context.MklPrepareConcatInputs(context, input_tensors);
+
+ // Execute primitive.
+ CHECK_EQ(dnnExecute_F32(mkl_context.prim_concat, mkl_context.concat_res),
+ E_SUCCESS);
+
+ mkl_context.MklCleanup();
+ }
+
+ private:
+ typedef struct {
+ TensorFormat data_format;
+ size_t out_sizes[4];
+ size_t out_strides[4];
+ dnnPrimitive_t prim_concat;
+ void* concat_res[dnnResourceNumber];
+ std::vector<dnnLayout_t> lt_inputs;
+ std::vector<Tensor> mkl_tmp_tensors;
+
+ // Create MKL dnnLayout_t objects for tensors coming into the layer
+ // We only support case where input tensors are all in Mkl layout.
+ void MklCreateInputLayouts(OpKernelContext* context,
+ MklShapeList& input_shapes) {
+ for (auto& is : input_shapes) {
+ CHECK_EQ(is.IsMklTensor(), true);
+ lt_inputs.push_back((dnnLayout_t)is.GetCurLayout());
+ }
+ }
+
+ void MklPrepareConcatInputs(OpKernelContext* context,
+ OpInputList& input_tensors) {
+ CHECK_EQ(lt_inputs.size(), mkl_tmp_tensors.size());
+
+ for (int i = 0; i < lt_inputs.size(); ++i) {
+ dnnPrimitive_t mkl_prim_convert_input;
+ dnnLayout_t mkl_lt_internal_input;
+ void* mkl_buf_convert_input = nullptr;
+
+ CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(
+ &mkl_lt_internal_input, prim_concat,
+ (dnnResourceType_t)(dnnResourceMultipleSrc + i)),
+ E_SUCCESS);
+
+ if (!dnnLayoutCompare_F32(lt_inputs[i], mkl_lt_internal_input)) {
+ CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_input,
+ lt_inputs[i], mkl_lt_internal_input),
+ E_SUCCESS);
+
+ AllocTmpBuffer(context, &mkl_tmp_tensors[i], mkl_lt_internal_input,
+ &mkl_buf_convert_input);
+
+ CHECK_EQ(dnnConversionExecute_F32(
+ mkl_prim_convert_input,
+ const_cast<void*>(static_cast<const void*>(
+ input_tensors[i].flat<T>().data())),
+ mkl_buf_convert_input),
+ E_SUCCESS);
+
+ concat_res[dnnResourceMultipleSrc + i] = mkl_buf_convert_input;
+ CHECK_EQ(dnnDelete_F32(mkl_prim_convert_input), E_SUCCESS);
+ } else {
+ concat_res[dnnResourceMultipleSrc + i] = const_cast<void*>(
+ static_cast<const void*>(input_tensors[i].flat<T>().data()));
+ }
+
+ CHECK_EQ(dnnLayoutDelete_F32(mkl_lt_internal_input), E_SUCCESS);
+ }
+ }
+
+ void MklCleanup() {
+ for (auto& lt : lt_inputs) {
+ lt = nullptr;
+ }
+ CHECK_EQ(dnnDelete_F32(prim_concat), E_SUCCESS);
+ }
+ } MklConcatOpContext;
+
+ void CallEigenVersion(OpKernelContext* context, const OpInputList& values,
+ const MklShapeList& input_shapes) {
+ // Before calling Eigen version, we need to convert Mkl tensors to TF.
+ // First check that the number of input tensors and the number of Mkl
+ // shapes match.
+ CHECK_EQ(values.size(), input_shapes.size());
+
+ std::vector<Tensor> converted_values;
+ for (int i = 0; i < input_shapes.size(); i++) {
+ if (input_shapes[i].IsMklTensor()) {
+ // If input tensor is Mkl, then do the conversion.
+ Tensor tmp_tensor =
+ ConvertMklToTF<T>(context, values[i], input_shapes[i]);
+ converted_values.push_back(tmp_tensor);
+ } else {
+ // If input tensor is TF already, then we do not need any conversion.
+ converted_values.push_back(values[i]);
+ }
+ }
+
+ // Call Eigen concat.
+ eigen_concat_op_.Compute(context, converted_values);
+
+ // Set dummy Mkl tensor as output Mkl tensor for this op.
+ MklShape mkl_tensor_mkl_shape;
+ mkl_tensor_mkl_shape.SetMklTensor(false);
+ mkl_tensor_mkl_shape.SetDimensions(4);
+ mkl_tensor_mkl_shape.SetTfDimOrder(4); // Dimensions
+ Tensor* mkl_tensor = nullptr;
+ TensorShape mkl_tensor_tf_shape;
+ mkl_tensor_tf_shape.AddDim(
+ SIZE_OF_MKL_SERIAL_DATA(mkl_tensor_mkl_shape.GetDimension()));
+ int tf_output_index = 0;
+ context->allocate_output(
+ GetTensorMetaDataIndex(tf_output_index, context->num_outputs()),
+ mkl_tensor_tf_shape, &mkl_tensor);
+ mkl_tensor_mkl_shape.SerializeMklShape(
+ mkl_tensor->flat<uint8>().data(),
+ mkl_tensor->flat<uint8>().size() * sizeof(uint8));
+ }
+};
+
+/* Use optimized concat for float type only */
+#define REGISTER_MKL_CPU(type) \
+ REGISTER_KERNEL_BUILDER(Name("_MklConcat") \
+ .Device(DEVICE_CPU) \
+ .TypeConstraint<type>("T") \
+ .HostMemory("concat_dim") \
+ .Label(mkl_op_registry::kMklOpLabel), \
+ MklConcatOp<CPUDevice, type, NAME_IS_CONCAT_DIM>) \
+ REGISTER_KERNEL_BUILDER(Name("_MklConcatV2") \
+ .Device(DEVICE_CPU) \
+ .TypeConstraint<type>("T") \
+ .TypeConstraint<int32>("Tidx") \
+ .HostMemory("axis") \
+ .Label(mkl_op_registry::kMklOpLabel), \
+ MklConcatOp<CPUDevice, type, NAME_IS_AXIS>)
+
+TF_CALL_float(REGISTER_MKL_CPU);
+
+#undef REGISTER_CONCAT_MKL
+} // namespace tensorflow
+
+#endif // INTEL_MKL