diff options
author | 2015-11-06 16:27:58 -0800 | |
---|---|---|
committer | 2015-11-06 16:27:58 -0800 | |
commit | f41959ccb2d9d4c722fe8fc3351401d53bcf4900 (patch) | |
tree | ef0ca22cb2a5ac4bdec9d080d8e0788a53ed496d /tensorflow/core/kernels/shape_ops.cc |
TensorFlow: Initial commit of TensorFlow library.
TensorFlow is an open source software library for numerical computation
using data flow graphs.
Base CL: 107276108
Diffstat (limited to 'tensorflow/core/kernels/shape_ops.cc')
-rw-r--r-- | tensorflow/core/kernels/shape_ops.cc | 261 |
1 files changed, 261 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/shape_ops.cc b/tensorflow/core/kernels/shape_ops.cc new file mode 100644 index 0000000000..7cb1da8983 --- /dev/null +++ b/tensorflow/core/kernels/shape_ops.cc @@ -0,0 +1,261 @@ +// See docs in ../ops/array_ops.cc. + +#include <unordered_set> + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/public/tensor.h" +#include "tensorflow/core/public/tensor_shape.h" + +namespace tensorflow { + +class ShapeOp : public OpKernel { + public: + explicit ShapeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + const Tensor& inp = ctx->input(0); + const int rank = inp.dims(); + Tensor* out = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({rank}), &out)); + auto vec = out->vec<int32>(); + for (int i = 0; i < rank; ++i) vec(i) = inp.dim_size(i); + } + + bool IsExpensive() override { return false; } +}; +REGISTER_KERNEL_BUILDER(Name("Shape").Device(DEVICE_CPU).HostMemory("output"), + ShapeOp); + +#define REGISTER_GPU_KERNEL(type) \ + REGISTER_KERNEL_BUILDER(Name("Shape") \ + .Device(DEVICE_GPU) \ + .HostMemory("output") \ + .TypeConstraint<type>("T"), \ + ShapeOp) +TF_CALL_REAL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL); +#undef REGISTER_GPU_KERNEL + +// A special GPU kernel for int32. +// TODO(b/25387198): Also enable int32 in device memory. This kernel +// registration requires all int32 inputs and outputs to be in host memory. +REGISTER_KERNEL_BUILDER(Name("Shape") + .Device(DEVICE_GPU) + .HostMemory("input") + .HostMemory("output") + .TypeConstraint<int32>("T"), + ShapeOp); + +class RankOp : public OpKernel { + public: + explicit RankOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + const Tensor& inp = ctx->input(0); + const int rank = inp.dims(); + Tensor* out = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &out)); + out->scalar<int32>()() = rank; + } + + bool IsExpensive() override { return false; } +}; +REGISTER_KERNEL_BUILDER(Name("Rank").Device(DEVICE_CPU).HostMemory("output"), + RankOp); + +#define REGISTER_GPU_KERNEL(type) \ + REGISTER_KERNEL_BUILDER(Name("Rank") \ + .Device(DEVICE_GPU) \ + .TypeConstraint<type>("T") \ + .HostMemory("output"), \ + RankOp); +TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL); +#undef REGISTER_GPU_KERNEL + +// A special GPU kernel for int32. +// TODO(b/25387198): Also enable int32 in device memory. This kernel +// registration requires all int32 inputs and outputs to be in host memory. +REGISTER_KERNEL_BUILDER(Name("Rank") + .Device(DEVICE_GPU) + .TypeConstraint<int32>("T") + .HostMemory("input") + .HostMemory("output"), + RankOp); + +class SizeOp : public OpKernel { + public: + explicit SizeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + const Tensor& inp = ctx->input(0); + const int64 size = inp.NumElements(); + Tensor* out = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &out)); + // TODO(josh11b): switch output to int64? + out->scalar<int32>()() = size; + } + + bool IsExpensive() override { return false; } +}; +REGISTER_KERNEL_BUILDER(Name("Size").Device(DEVICE_CPU).HostMemory("output"), + SizeOp); + +#define REGISTER_GPU_KERNEL(type) \ + REGISTER_KERNEL_BUILDER(Name("Size") \ + .Device(DEVICE_GPU) \ + .TypeConstraint<type>("T") \ + .HostMemory("output"), \ + SizeOp); +TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL); +#undef REGISTER_GPU_KERNEL + +// A special GPU kernel for int32. +// TODO(b/25387198): Also enable int32 in device memory. This kernel +// registration requires all int32 inputs and outputs to be in host memory. +REGISTER_KERNEL_BUILDER(Name("Size") + .Device(DEVICE_GPU) + .TypeConstraint<int32>("T") + .HostMemory("input") + .HostMemory("output"), + SizeOp); + +class ExpandDimsOp : public OpKernel { + public: + explicit ExpandDimsOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + int dim = ctx->input(1).flat<int>()(0); + OP_REQUIRES( + ctx, (dim >= -1 - ctx->input(0).dims() && dim <= ctx->input(0).dims()), + errors::InvalidArgument("Tried to expand dim index ", dim, + " for tensor with ", ctx->input(0).dims(), + " dimensions.")); + + auto existing_dims = ctx->input(0).shape().dim_sizes(); + std::vector<int64> new_shape(existing_dims.size()); + for (size_t i = 0; i < new_shape.size(); ++i) { + new_shape[i] = existing_dims[i]; + } + + // We emulate numpy's interpretation of the dim axis when + // -input.dims() >= dim <= input.dims(). + if (dim < 0) { + dim += existing_dims.size() + 1; + } + + // Clamp to the end if needed. + dim = std::min<int32>(dim, existing_dims.size()); + new_shape.emplace(new_shape.begin() + dim, 1); + const TensorShape output_shape(new_shape); + + Tensor* output = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, {0}, &output)); + if (!output->CopyFrom(ctx->input(0), output_shape)) { + // This should never happen, since the sizes of the input and output + // should always be the same (we only expand the dimension with 1). + ctx->SetStatus( + errors::Internal("Could not expand dimension with input shape ", + ctx->input(0).shape().DebugString(), + " and output shape ", output_shape.DebugString())); + } + } +}; +REGISTER_KERNEL_BUILDER(Name("ExpandDims").Device(DEVICE_CPU).HostMemory("dim"), + ExpandDimsOp); + +#define REGISTER_GPU_KERNEL(type) \ + REGISTER_KERNEL_BUILDER(Name("ExpandDims") \ + .Device(DEVICE_GPU) \ + .TypeConstraint<type>("T") \ + .HostMemory("dim"), \ + ExpandDimsOp); +TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL); +#undef REGISTER_GPU_KERNEL + +REGISTER_KERNEL_BUILDER(Name("ExpandDims") + .Device(DEVICE_GPU) + .TypeConstraint<int32>("T") + .HostMemory("input") + .HostMemory("dim") + .HostMemory("output"), + ExpandDimsOp); + +class SqueezeOp : public OpKernel { + public: + explicit SqueezeOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + std::vector<int32> squeeze_dims; + OP_REQUIRES_OK(ctx, ctx->GetAttr("squeeze_dims", &squeeze_dims)); + squeeze_dims_.insert(squeeze_dims.begin(), squeeze_dims.end()); + } + + void Compute(OpKernelContext* ctx) override { + auto existing_dims = ctx->input(0).shape().dim_sizes(); + std::vector<int64> new_shape; + + std::unordered_set<int32> wrapped_squeeze_dims; + wrapped_squeeze_dims.reserve(squeeze_dims_.size()); + // Validate squeeze dims against the input. + for (int32 dim : squeeze_dims_) { + OP_REQUIRES( + ctx, (dim >= -ctx->input(0).dims() && dim < ctx->input(0).dims()), + errors::InvalidArgument("Tried to squeeze dim index ", dim, + " for tensor with ", ctx->input(0).dims(), + " dimensions.")); + // If dim is < 0, we wrap around (-1 means the last element). + if (dim < 0) { + dim = existing_dims.size() + dim; + } + + wrapped_squeeze_dims.insert(dim); + } + + for (size_t i = 0; i < existing_dims.size(); ++i) { + auto existing_dim = existing_dims[i]; + + // If squeeze_set is non-empty, only squeeze those dimensions. + if (!wrapped_squeeze_dims.empty()) { + if (wrapped_squeeze_dims.count(i) > 0) { + OP_REQUIRES(ctx, existing_dim == 1, + errors::InvalidArgument("Tried to explicitly squeeze " + "dimension ", + i, " but dimension was not 1: ", + existing_dim)); + } else { + // This dimension is not being squeezed. + new_shape.push_back(existing_dim); + } + } else { + // Copy over all non-1-length dimensions. + if (existing_dim != 1) { + new_shape.push_back(existing_dim); + } + } + } + + const TensorShape output_shape(new_shape); + Tensor* output = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, {0}, &output)); + if (!output->CopyFrom(ctx->input(0), output_shape)) { + // This should never happen, since the sizes of the input and + // output should always be the same. + ctx->SetStatus(errors::Internal("Could not squeeze input with shape ", + ctx->input(0).shape().DebugString(), + " and output shape ", + output_shape.DebugString())); + } + } + + private: + std::unordered_set<int32> squeeze_dims_; +}; + +REGISTER_KERNEL_BUILDER(Name("Squeeze").Device(DEVICE_CPU), SqueezeOp); + +#define REGISTER_GPU_KERNEL(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Squeeze").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ + SqueezeOp); +TF_CALL_NUMBER_TYPES(REGISTER_GPU_KERNEL); +#undef REGISTER_GPU_KERNEL + +} // namespace tensorflow |