diff options
Diffstat (limited to 'tensorflow/core/kernels/unpack_op.cc')
-rw-r--r-- | tensorflow/core/kernels/unpack_op.cc | 96 |
1 files changed, 96 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/unpack_op.cc b/tensorflow/core/kernels/unpack_op.cc new file mode 100644 index 0000000000..36cfb2c8e5 --- /dev/null +++ b/tensorflow/core/kernels/unpack_op.cc @@ -0,0 +1,96 @@ +// See docs in ../ops/array_ops.cc. + +#define EIGEN_USE_THREADS + +#include <vector> + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/kernels/split_op.h" +#include "tensorflow/core/public/status.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/public/tensor.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace tensorflow { + +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; + +template <typename Device, typename T> +class UnpackOp : public OpKernel { + public: + explicit UnpackOp(OpKernelConstruction* c) : OpKernel(c) {} + + void Compute(OpKernelContext* context) override { + const int32 num = num_outputs(); + const Tensor& input = context->input(0); + const TensorShape& input_shape = input.shape(); + + OP_REQUIRES( + context, input_shape.dims() > 0 && input_shape.dim_size(0) == num, + errors::InvalidArgument("Input shape must start with ", num, ", got ", + input_shape.ShortDebugString())); + + auto output_shape = input_shape; + output_shape.RemoveDim(0); + const int32 output_size = output_shape.num_elements(); + + // Special case: Aligned, so we can share the underlying buffer. + // + // Apply this optimization conservatively: if input is aligned, + // the resulting tensors must be aligned. It's conservative + // because if the immediate consumer of the resulting tensors are + // not using eigen for computation, its perfectly fine to avoid + // the copying. + if (output_size == 0 || IsInnerDimsSizeAligned<T>(input_shape)) { + for (int i = 0; i < num; ++i) { + Tensor output; + CHECK(output.CopyFrom(input.Slice(i, i + 1), output_shape)); + context->set_output(i, output); + } + return; + } + + // Except for shape, unpack is a special case of split, so we reuse the + // same computational kernels. + auto input_reshaped = input.shaped<T, 3>({1, num, output_size}); + + for (int i = 0; i < num; ++i) { + Tensor* output; + OP_REQUIRES_OK(context, + context->allocate_output(i, output_shape, &output)); + auto output_shaped = output->shaped<T, 3>({1, 1, output_size}); + + Eigen::DSizes<ptrdiff_t, 3> indices{0, i, 0}; + Eigen::DSizes<ptrdiff_t, 3> sizes{1, 1, output_size}; + functor::Split<Device, T>()(context->eigen_device<Device>(), + output_shaped, input_reshaped, indices, + sizes); + } + } +}; + +#define REGISTER_UNPACK(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Unpack").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ + UnpackOp<CPUDevice, type>) + +TF_CALL_ALL_TYPES(REGISTER_UNPACK); + +#undef REGISTER_UNPACK + +#if GOOGLE_CUDA + +#define REGISTER_GPU(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Unpack").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ + UnpackOp<GPUDevice, type>) + +TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); +#undef REGISTER_GPU + +#endif // GOOGLE_CUDA + +} // end namespace tensorflow |