// See docs in ../ops/nn_ops.cc. #define EIGEN_USE_THREADS #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/kernels/bias_op.h" #include "tensorflow/core/public/tensor.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; template class BiasOp : public BinaryOp { public: explicit BiasOp(OpKernelConstruction* context) : BinaryOp(context) {} void Compute(OpKernelContext* context) override { const Tensor& input = context->input(0); const Tensor& bias = context->input(1); OP_REQUIRES(context, TensorShapeUtils::IsMatrixOrHigher(input.shape()), errors::InvalidArgument("Input tensor must be at least 2D: ", input.shape().DebugString())); OP_REQUIRES(context, TensorShapeUtils::IsVector(bias.shape()), errors::InvalidArgument("Biases must be 1D: ", bias.shape().DebugString())); const auto last_dim = input.shape().dims() - 1; OP_REQUIRES( context, bias.shape().dim_size(0) == input.shape().dim_size(last_dim), errors::InvalidArgument( "Must provide as many biases as the last dimension " "of the input tensor: ", bias.shape().DebugString(), " vs. ", input.shape().DebugString())); Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, input.shape(), &output)); switch (input.shape().dims()) { case 2: Compute<2>(context, input, bias, output); break; case 3: Compute<3>(context, input, bias, output); break; case 4: Compute<4>(context, input, bias, output); break; case 5: Compute<5>(context, input, bias, output); break; default: OP_REQUIRES(context, false, errors::InvalidArgument("Only ranks up to 5 supported: ", input.shape().DebugString())); } } // Add biases for an input matrix of rank Dims, by using the Bias. template void Compute(OpKernelContext* ctx, const Tensor& input, const Tensor& bias, Tensor* output) { functor::Bias functor; functor(ctx->eigen_device(), input.tensor(), bias.vec(), output->tensor()); } }; #define REGISTER_KERNEL(type) \ REGISTER_KERNEL_BUILDER( \ Name("BiasAdd").Device(DEVICE_CPU).TypeConstraint("T"), \ BiasOp); TF_CALL_NUMBER_TYPES(REGISTER_KERNEL); #undef REGISTER_KERNEL #if GOOGLE_CUDA // Forward declarations of the functor specializations for GPU. namespace functor { #define DECLARE_GPU_SPEC(T, Dims) \ template <> \ void Bias::operator()( \ const GPUDevice& d, typename TTypes::ConstTensor input, \ typename TTypes::ConstVec bias, \ typename TTypes::Tensor output); \ extern template struct Bias; #define DECLARE_GPU_SPECS(T) \ DECLARE_GPU_SPEC(T, 2); \ DECLARE_GPU_SPEC(T, 3); \ DECLARE_GPU_SPEC(T, 4); \ DECLARE_GPU_SPEC(T, 5); TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); } // namespace functor // Registration of the GPU implementations. #define REGISTER_GPU_KERNEL(type) \ REGISTER_KERNEL_BUILDER( \ Name("BiasAdd").Device(DEVICE_GPU).TypeConstraint("T"), \ BiasOp); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL); #endif // GOOGLE_CUDA } // namespace tensorflow