// See docs in ../ops/array_ops.cc. #include #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/kernels/concat_op.h" #include "tensorflow/core/platform/port.h" #include "tensorflow/core/public/tensor.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/public/status.h" namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; // -------------------------------------------------------------------------- template class PackOp : public OpKernel { public: typedef std::vector::ConstMatrix>> ConstMatrixVector; explicit PackOp(OpKernelConstruction* c) : OpKernel(c) {} void Compute(OpKernelContext* c) override { OpInputList values; OP_REQUIRES_OK(c, c->input_list("values", &values)); const int num = values.size(); // Verify that all input shapes match for (int i = 1; i < num; i++) { OP_REQUIRES(c, values[0].shape().IsSameSize(values[i].shape()), errors::InvalidArgument( "Shapes of all inputs must match: values[0].shape = ", values[0].shape().ShortDebugString(), " != values[", i, "].shape = ", values[i].shape().ShortDebugString())); } TensorShape output_shape(values[0].shape()); output_shape.InsertDim(0, num); // In the num = 1 case, just reshape the input if (num == 1) { Tensor output; CHECK(output.CopyFrom(values[0], output_shape)); c->set_output(0, output); return; } // Allocate output Tensor* output; OP_REQUIRES_OK(c, c->allocate_output(0, output_shape, &output)); const int output_size = output->NumElements(); if (output_size > 0) { auto output_flat = output->shaped({1, output_size}); // Except for shapes, pack is a special case of concat, so we reuse the // same computational kernels. ConstMatrixVector inputs_flat; inputs_flat.reserve(num); for (int i = 0; i < num; ++i) { inputs_flat.emplace_back(new typename TTypes::ConstMatrix( values[i].shaped({1, values[i].NumElements()}))); } if (std::is_same::value) { ConcatGPU(c->eigen_gpu_device(), inputs_flat, &output_flat); } else { ConcatCPU(c->device(), inputs_flat, &output_flat); } } } }; #define REGISTER_PACK(type) \ REGISTER_KERNEL_BUILDER( \ Name("Pack").Device(DEVICE_CPU).TypeConstraint("T"), \ PackOp) TF_CALL_ALL_TYPES(REGISTER_PACK); REGISTER_PACK(quint8); REGISTER_PACK(qint8); REGISTER_PACK(qint32); REGISTER_PACK(bfloat16); #undef REGISTER_PACK #if GOOGLE_CUDA #define REGISTER_GPU(type) \ REGISTER_KERNEL_BUILDER( \ Name("Pack").Device(DEVICE_GPU).TypeConstraint("T"), \ PackOp) TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); #undef REGISTER_GPU // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel // registration requires all int32 inputs and outputs to be in host memory. REGISTER_KERNEL_BUILDER(Name("Pack") .Device(DEVICE_GPU) .HostMemory("values") .HostMemory("output") .TypeConstraint("T"), PackOp); #endif // GOOGLE_CUDA } // namespace tensorflow