// See docs in ../ops/array_ops.cc. #define EIGEN_USE_THREADS #include "tensorflow/core/kernels/constant_op.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/kernels/fill_functor.h" #include "tensorflow/core/public/tensor.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" namespace tensorflow { ConstantOp::ConstantOp(OpKernelConstruction* ctx) : OpKernel(ctx), tensor_(ctx->output_type(0)) { const TensorProto* proto = nullptr; OP_REQUIRES_OK(ctx, ctx->GetAttr("value", &proto)); OP_REQUIRES_OK(ctx, ctx->device()->MakeTensorFromProto( *proto, AllocatorAttributes(), &tensor_)); OP_REQUIRES( ctx, ctx->output_type(0) == tensor_.dtype(), errors::InvalidArgument("Type mismatch between value (", DataTypeString(tensor_.dtype()), ") and dtype (", DataTypeString(ctx->output_type(0)), ")")); } void ConstantOp::Compute(OpKernelContext* ctx) { ctx->set_output(0, tensor_); } ConstantOp::~ConstantOp() {} REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_CPU), ConstantOp); #if GOOGLE_CUDA #define REGISTER_KERNEL(D, TYPE) \ REGISTER_KERNEL_BUILDER( \ Name("Const").Device(DEVICE_##D).TypeConstraint("dtype"), \ ConstantOp); REGISTER_KERNEL(GPU, float); REGISTER_KERNEL(GPU, double); REGISTER_KERNEL(GPU, uint8); REGISTER_KERNEL(GPU, int8); REGISTER_KERNEL(GPU, int16); REGISTER_KERNEL(GPU, int64); REGISTER_KERNEL(GPU, complex64); REGISTER_KERNEL(GPU, bool); // Currently we do not support string constants on GPU #undef REGISTER_KERNEL #endif // HostConstantOp differs from ConstantOp in that its output is always // in host memory. class HostConstantOp : public OpKernel { public: explicit HostConstantOp(OpKernelConstruction* ctx) : OpKernel(ctx), tensor_(ctx->output_type(0)) { const TensorProto* proto = nullptr; AllocatorAttributes alloc_attr; alloc_attr.set_on_host(true); OP_REQUIRES_OK(ctx, ctx->GetAttr("value", &proto)); OP_REQUIRES_OK( ctx, ctx->device()->MakeTensorFromProto(*proto, alloc_attr, &tensor_)); OP_REQUIRES( ctx, ctx->output_type(0) == tensor_.dtype(), errors::InvalidArgument( "Type mismatch between value (", DataTypeString(tensor_.dtype()), ") and dtype (", DataTypeString(ctx->output_type(0)), ")")); } void Compute(OpKernelContext* ctx) override { ctx->set_output(0, tensor_); } bool IsExpensive() override { return false; } ~HostConstantOp() override {} private: Tensor tensor_; TF_DISALLOW_COPY_AND_ASSIGN(HostConstantOp); }; // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel // registration requires all int32 inputs and outputs to be in host memory. REGISTER_KERNEL_BUILDER(Name("Const") .Device(DEVICE_GPU) .HostMemory("output") .TypeConstraint("dtype"), HostConstantOp); typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; namespace functor { // Partial specialization of FillFunctor. template struct FillFunctor { void operator()(const CPUDevice& d, typename TTypes::Flat out, typename TTypes::ConstScalar in) { out.device(d) = out.constant(in()); } }; // Partial specialization of SetZeroFunctor. template struct SetZeroFunctor { void operator()(const CPUDevice& d, typename TTypes::Flat out) { out.device(d) = out.constant(0); } }; #define DEFINE_SETZERO_CPU(T) template struct SetZeroFunctor DEFINE_SETZERO_CPU(float); DEFINE_SETZERO_CPU(double); DEFINE_SETZERO_CPU(int32); DEFINE_SETZERO_CPU(complex64); #undef DEFINE_SETZERO_CPU } // end namespace functor template class FillOp : public OpKernel { public: explicit FillOp(OpKernelConstruction* context) : OpKernel(context) {} void Compute(OpKernelContext* context) override { const Tensor& Tdims = context->input(0); OP_REQUIRES(context, TensorShapeUtils::IsLegacyVector(Tdims.shape()), errors::InvalidArgument("dims must be a vector of int32.")); const Tensor& Tvalue = context->input(1); OP_REQUIRES(context, TensorShapeUtils::IsLegacyScalar(Tvalue.shape()), errors::InvalidArgument("value must be a scalar.")); auto dims = Tdims.flat(); for (int i = 0; i < dims.size(); i++) { OP_REQUIRES(context, dims(i) >= 0, errors::InvalidArgument("dims[", i, "] = ", dims(i), " must be nonnegative.")); } Tensor* out = nullptr; OP_REQUIRES_OK( context, context->allocate_output( 0, TensorShapeUtils::MakeShape( reinterpret_cast(dims.data()), dims.size()), &out)); functor::FillFunctor functor; functor(context->eigen_device(), out->flat(), Tvalue.scalar()); } }; #define REGISTER_KERNEL(D, TYPE) \ REGISTER_KERNEL_BUILDER(Name("Fill") \ .Device(DEVICE_##D) \ .TypeConstraint("T") \ .HostMemory("dims"), \ FillOp); #define REGISTER_CPU_KERNEL(TYPE) REGISTER_KERNEL(CPU, TYPE) TF_CALL_ALL_TYPES(REGISTER_CPU_KERNEL); #undef REGISTER_CPU_KERNEL #if GOOGLE_CUDA REGISTER_KERNEL(GPU, float); REGISTER_KERNEL(GPU, double); REGISTER_KERNEL(GPU, uint8); REGISTER_KERNEL(GPU, int8); REGISTER_KERNEL(GPU, int16); REGISTER_KERNEL(GPU, int64); // Currently we do not support filling strings and complex64 on GPU #endif // GOOGLE_CUDA #undef REGISTER_KERNEL // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel // registration requires all int32 inputs and outputs to be in host memory. REGISTER_KERNEL_BUILDER(Name("Fill") .Device(DEVICE_GPU) .TypeConstraint("T") .HostMemory("dims") .HostMemory("value") .HostMemory("output"), FillOp); template class ZerosLikeOp : public OpKernel { public: explicit ZerosLikeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} void Compute(OpKernelContext* ctx) override { const Tensor& input = ctx->input(0); Tensor* out = nullptr; OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &out)); Tensor zero(DataTypeToEnum::value, {1}); zero.scalar().setZero(); const Tensor& zero_cref = zero; functor::FillFunctor functor; functor(ctx->eigen_device(), out->flat(), zero_cref.scalar()); } }; #define REGISTER_KERNEL(type, dev) \ REGISTER_KERNEL_BUILDER( \ Name("ZerosLike").Device(DEVICE_##dev).TypeConstraint("T"), \ ZerosLikeOp) #define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU) TF_CALL_ALL_TYPES(REGISTER_CPU); #undef REGISTER_CPU #if GOOGLE_CUDA REGISTER_KERNEL(float, GPU); REGISTER_KERNEL(double, GPU); #endif // GOOGLE_CUDA #undef REGISTER_KERNEL class PlaceholderOp : public OpKernel { public: explicit PlaceholderOp(OpKernelConstruction* ctx) : OpKernel(ctx) { OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &expected_shape_)); } void Compute(OpKernelContext* ctx) override { if (expected_shape_.dims() > 0) { OP_REQUIRES(ctx, false, errors::InvalidArgument( "You must feed a value for placeholder tensor '", name(), "' with dtype ", DataTypeString(output_type(0)), " and shape ", expected_shape_.DebugString())); } else { OP_REQUIRES(ctx, false, errors::InvalidArgument( "You must feed a value for placeholder tensor '", name(), "' with dtype ", DataTypeString(output_type(0)))); } } private: TensorShape expected_shape_; }; REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_CPU), PlaceholderOp); } // namespace tensorflow