From f41959ccb2d9d4c722fe8fc3351401d53bcf4900 Mon Sep 17 00:00:00 2001 From: Manjunath Kudlur Date: Fri, 6 Nov 2015 16:27:58 -0800 Subject: TensorFlow: Initial commit of TensorFlow library. TensorFlow is an open source software library for numerical computation using data flow graphs. Base CL: 107276108 --- tensorflow/core/kernels/transpose_op.cc | 190 ++++++++++++++++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 tensorflow/core/kernels/transpose_op.cc (limited to 'tensorflow/core/kernels/transpose_op.cc') diff --git a/tensorflow/core/kernels/transpose_op.cc b/tensorflow/core/kernels/transpose_op.cc new file mode 100644 index 0000000000..4f11a881f8 --- /dev/null +++ b/tensorflow/core/kernels/transpose_op.cc @@ -0,0 +1,190 @@ +// See docs in ../ops/array_ops.cc. + +#define EIGEN_USE_THREADS + +#include "tensorflow/core/kernels/transpose_op.h" +#include "tensorflow/core/kernels/transpose_op_functor.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/public/status.h" +#include "tensorflow/core/public/tensor.h" +#include "tensorflow/core/public/tensor_shape.h" + +namespace tensorflow { + +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; + +// inv = InvertPermutationOp(T p) takes a permutation of +// integers 0, 1, ..., n - 1 and returns the inverted +// permutation of p. I.e., inv[p[i]] == i, for i in [0 .. n). +// +// REQUIRES: input is a vector of int32. +// REQUIRES: input is a permutation of 0, 1, ..., n-1. + +class InvertPermutationOp : public OpKernel { + public: + explicit InvertPermutationOp(OpKernelConstruction* context) + : OpKernel(context) {} + + void Compute(OpKernelContext* context) override { + const Tensor& input = context->input(0); + OP_REQUIRES( + context, TensorShapeUtils::IsVector(input.shape()), + errors::InvalidArgument("invert_permutation expects a 1D vector.")); + auto Tin = input.vec(); + const int N = Tin.size(); + Tensor* output = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(0, input.shape(), &output)); + auto Tout = output->vec(); + std::fill_n(Tout.data(), N, -1); + for (int i = 0; i < N; ++i) { + const int32 d = Tin(i); + OP_REQUIRES(context, 0 <= d && d < N, + errors::InvalidArgument(d, " is not between 0 and ", N)); + OP_REQUIRES(context, Tout(d) == -1, + errors::InvalidArgument(d, " is duplicated in the input.")); + Tout(d) = i; + } + } +}; + +REGISTER_KERNEL_BUILDER(Name("InvertPermutation").Device(DEVICE_CPU), + InvertPermutationOp); + +// output = TransposeOp(T input, T perm) takes a tensor +// of type T and rank N, and a permutation of 0, 1, ..., N-1. It +// shuffles the dimensions of the input tensor according to permutation. +// +// Specifically, the returned tensor output meets the following condition: +// 1) output.dims() == input.dims(); +// 2) output.dim_size(i) == input.dim_size(perm[i]); +// 3) output.tensor(i_0, i_1, ..., i_N-1) == +// input.tensor(j_0, j_1, ..., j_N-1), +// where i_s == j_{perm[s]} +// +// REQUIRES: perm is a vector of int32. +// REQUIRES: input.dims() == perm.size(). +// REQUIRES: perm is a permutation. + +template +TransposeOp::TransposeOp(OpKernelConstruction* context) + : OpKernel(context) {} + +template +void TransposeOp::Compute(OpKernelContext* context) { + const Tensor& input = context->input(0); + const Tensor& perm = context->input(1); + // Preliminary validation of sizes. + OP_REQUIRES(context, TensorShapeUtils::IsVector(perm.shape()), + errors::InvalidArgument("perm must be a vector, not ", + perm.shape().DebugString())); + auto Vperm = perm.vec(); + const int dims = input.dims(); + static const int kMinDims = 1; + static const int kMaxDims = 8; + OP_REQUIRES(context, kMinDims <= dims && dims <= kMaxDims, + errors::Unimplemented("Transposing a tensor of rank ", dims, + " is not implemented.")); + OP_REQUIRES(context, dims == Vperm.size(), + errors::InvalidArgument( + "transpose expects a vector of size ", input.dims(), + ". But input(1) is a vector of size ", Vperm.size())); + gtl::ArraySlice permutation( + reinterpret_cast(Vperm.data()), dims); + TensorShape shape; + + // Check whether permutation is a permutation of integers of [0 .. dims). + gtl::InlinedVector bits(dims); + for (const int32 d : permutation) { + OP_REQUIRES( + context, 0 <= d && d < dims, + errors::InvalidArgument(d, " is out of range [0 .. ", dims, ")")); + bits[d] = true; + shape.AddDim(input.dim_size(d)); + } + for (int i = 0; i < dims; ++i) { + OP_REQUIRES(context, bits[i], errors::InvalidArgument( + i, " is missing from {", + str_util::Join(permutation, ","), "}.")); + } + + Tensor* output = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(0, shape, &output)); + switch (dims) { +#define EXPAND_DIM(N) \ + case N: { \ + functor::TransposeFunctor func; \ + func(context->eigen_device(), output->tensor(), \ + input.tensor(), permutation.data()); \ + break; \ + } + EXPAND_DIM(1); + EXPAND_DIM(2); + EXPAND_DIM(3); + EXPAND_DIM(4); + EXPAND_DIM(5); + EXPAND_DIM(6); + EXPAND_DIM(7); + EXPAND_DIM(8); + default: + LOG(FATAL) << "Unexpected dims: " << dims; + } +#undef EXPAND_CASE +} + +namespace functor { + +template +void TransposeMaybeInline(const Device& d, + typename TTypes::Tensor out, + typename TTypes::ConstTensor in, + const int* perm) { + // perm[] is a permutation of 0, 1, ..., NDIMS-1. perm[] is on CPU. + Eigen::array p; + for (int i = 0; i < NDIMS; ++i) p[i] = perm[i]; + if (out.size() * sizeof(T) < 131072) { // Small transpose on a CPU: do inline + out = in.shuffle(p); + } else { + out.device(d) = in.shuffle(p); + } +} + +template +struct TransposeFunctor { + void operator()(const CPUDevice& d, typename TTypes::Tensor out, + typename TTypes::ConstTensor in, const int* perm) { + TransposeMaybeInline(d, out, in, perm); + } +}; + +} // namespace functor + +#define REGISTER(D, T) \ + template class TransposeOp; \ + REGISTER_KERNEL_BUILDER(Name("Transpose") \ + .Device(DEVICE_##D) \ + .TypeConstraint("T") \ + .HostMemory("perm"), \ + TransposeOp) +REGISTER(CPU, float); +REGISTER(CPU, double); +REGISTER(CPU, complex64); +REGISTER(CPU, uint8); +REGISTER(CPU, int8); +REGISTER(CPU, int16); +REGISTER(CPU, int32); +REGISTER(CPU, int64); +REGISTER(CPU, string); +#if GOOGLE_CUDA +REGISTER(GPU, uint8); +REGISTER(GPU, int8); +REGISTER(GPU, int16); +REGISTER(GPU, int32); +REGISTER(GPU, int64); +REGISTER(GPU, float); +REGISTER(GPU, double); +#endif +#undef REGISTER +} // namespace tensorflow -- cgit v1.2.3