// See docs in ../ops/array_ops.cc. #include #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/public/tensor.h" #include "tensorflow/core/public/tensor_shape.h" namespace tensorflow { class ShapeOp : public OpKernel { public: explicit ShapeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} void Compute(OpKernelContext* ctx) override { const Tensor& inp = ctx->input(0); const int rank = inp.dims(); Tensor* out = nullptr; OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({rank}), &out)); auto vec = out->vec(); for (int i = 0; i < rank; ++i) vec(i) = inp.dim_size(i); } bool IsExpensive() override { return false; } }; REGISTER_KERNEL_BUILDER(Name("Shape").Device(DEVICE_CPU).HostMemory("output"), ShapeOp); #define REGISTER_GPU_KERNEL(type) \ REGISTER_KERNEL_BUILDER(Name("Shape") \ .Device(DEVICE_GPU) \ .HostMemory("output") \ .TypeConstraint("T"), \ ShapeOp) TF_CALL_REAL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel // registration requires all int32 inputs and outputs to be in host memory. REGISTER_KERNEL_BUILDER(Name("Shape") .Device(DEVICE_GPU) .HostMemory("input") .HostMemory("output") .TypeConstraint("T"), ShapeOp); class RankOp : public OpKernel { public: explicit RankOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} void Compute(OpKernelContext* ctx) override { const Tensor& inp = ctx->input(0); const int rank = inp.dims(); Tensor* out = nullptr; OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &out)); out->scalar()() = rank; } bool IsExpensive() override { return false; } }; REGISTER_KERNEL_BUILDER(Name("Rank").Device(DEVICE_CPU).HostMemory("output"), RankOp); #define REGISTER_GPU_KERNEL(type) \ REGISTER_KERNEL_BUILDER(Name("Rank") \ .Device(DEVICE_GPU) \ .TypeConstraint("T") \ .HostMemory("output"), \ RankOp); TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel // registration requires all int32 inputs and outputs to be in host memory. REGISTER_KERNEL_BUILDER(Name("Rank") .Device(DEVICE_GPU) .TypeConstraint("T") .HostMemory("input") .HostMemory("output"), RankOp); class SizeOp : public OpKernel { public: explicit SizeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} void Compute(OpKernelContext* ctx) override { const Tensor& inp = ctx->input(0); const int64 size = inp.NumElements(); Tensor* out = nullptr; OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &out)); // TODO(josh11b): switch output to int64? out->scalar()() = size; } bool IsExpensive() override { return false; } }; REGISTER_KERNEL_BUILDER(Name("Size").Device(DEVICE_CPU).HostMemory("output"), SizeOp); #define REGISTER_GPU_KERNEL(type) \ REGISTER_KERNEL_BUILDER(Name("Size") \ .Device(DEVICE_GPU) \ .TypeConstraint("T") \ .HostMemory("output"), \ SizeOp); TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel // registration requires all int32 inputs and outputs to be in host memory. REGISTER_KERNEL_BUILDER(Name("Size") .Device(DEVICE_GPU) .TypeConstraint("T") .HostMemory("input") .HostMemory("output"), SizeOp); class ExpandDimsOp : public OpKernel { public: explicit ExpandDimsOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} void Compute(OpKernelContext* ctx) override { int dim = ctx->input(1).flat()(0); OP_REQUIRES( ctx, (dim >= -1 - ctx->input(0).dims() && dim <= ctx->input(0).dims()), errors::InvalidArgument("Tried to expand dim index ", dim, " for tensor with ", ctx->input(0).dims(), " dimensions.")); auto existing_dims = ctx->input(0).shape().dim_sizes(); std::vector new_shape(existing_dims.size()); for (size_t i = 0; i < new_shape.size(); ++i) { new_shape[i] = existing_dims[i]; } // We emulate numpy's interpretation of the dim axis when // -input.dims() >= dim <= input.dims(). if (dim < 0) { dim += existing_dims.size() + 1; } // Clamp to the end if needed. dim = std::min(dim, existing_dims.size()); new_shape.emplace(new_shape.begin() + dim, 1); const TensorShape output_shape(new_shape); Tensor* output = nullptr; OP_REQUIRES_OK(ctx, ctx->allocate_output(0, {0}, &output)); if (!output->CopyFrom(ctx->input(0), output_shape)) { // This should never happen, since the sizes of the input and output // should always be the same (we only expand the dimension with 1). ctx->SetStatus( errors::Internal("Could not expand dimension with input shape ", ctx->input(0).shape().DebugString(), " and output shape ", output_shape.DebugString())); } } }; REGISTER_KERNEL_BUILDER(Name("ExpandDims").Device(DEVICE_CPU).HostMemory("dim"), ExpandDimsOp); #define REGISTER_GPU_KERNEL(type) \ REGISTER_KERNEL_BUILDER(Name("ExpandDims") \ .Device(DEVICE_GPU) \ .TypeConstraint("T") \ .HostMemory("dim"), \ ExpandDimsOp); TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL REGISTER_KERNEL_BUILDER(Name("ExpandDims") .Device(DEVICE_GPU) .TypeConstraint("T") .HostMemory("input") .HostMemory("dim") .HostMemory("output"), ExpandDimsOp); class SqueezeOp : public OpKernel { public: explicit SqueezeOp(OpKernelConstruction* ctx) : OpKernel(ctx) { std::vector squeeze_dims; OP_REQUIRES_OK(ctx, ctx->GetAttr("squeeze_dims", &squeeze_dims)); squeeze_dims_.insert(squeeze_dims.begin(), squeeze_dims.end()); } void Compute(OpKernelContext* ctx) override { auto existing_dims = ctx->input(0).shape().dim_sizes(); std::vector new_shape; std::unordered_set wrapped_squeeze_dims; wrapped_squeeze_dims.reserve(squeeze_dims_.size()); // Validate squeeze dims against the input. for (int32 dim : squeeze_dims_) { OP_REQUIRES( ctx, (dim >= -ctx->input(0).dims() && dim < ctx->input(0).dims()), errors::InvalidArgument("Tried to squeeze dim index ", dim, " for tensor with ", ctx->input(0).dims(), " dimensions.")); // If dim is < 0, we wrap around (-1 means the last element). if (dim < 0) { dim = existing_dims.size() + dim; } wrapped_squeeze_dims.insert(dim); } for (size_t i = 0; i < existing_dims.size(); ++i) { auto existing_dim = existing_dims[i]; // If squeeze_set is non-empty, only squeeze those dimensions. if (!wrapped_squeeze_dims.empty()) { if (wrapped_squeeze_dims.count(i) > 0) { OP_REQUIRES(ctx, existing_dim == 1, errors::InvalidArgument("Tried to explicitly squeeze " "dimension ", i, " but dimension was not 1: ", existing_dim)); } else { // This dimension is not being squeezed. new_shape.push_back(existing_dim); } } else { // Copy over all non-1-length dimensions. if (existing_dim != 1) { new_shape.push_back(existing_dim); } } } const TensorShape output_shape(new_shape); Tensor* output = nullptr; OP_REQUIRES_OK(ctx, ctx->allocate_output(0, {0}, &output)); if (!output->CopyFrom(ctx->input(0), output_shape)) { // This should never happen, since the sizes of the input and // output should always be the same. ctx->SetStatus(errors::Internal("Could not squeeze input with shape ", ctx->input(0).shape().DebugString(), " and output shape ", output_shape.DebugString())); } } private: std::unordered_set squeeze_dims_; }; REGISTER_KERNEL_BUILDER(Name("Squeeze").Device(DEVICE_CPU), SqueezeOp); #define REGISTER_GPU_KERNEL(type) \ REGISTER_KERNEL_BUILDER( \ Name("Squeeze").Device(DEVICE_GPU).TypeConstraint("T"), \ SqueezeOp); TF_CALL_NUMBER_TYPES(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL } // namespace tensorflow