1 files changed, 112 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/bias_op.cc b/tensorflow/core/kernels/bias_op.cc
new file mode 100644
index 0000000000..68737f6c2d
--- /dev/null
+++ b/tensorflow/core/kernels/bias_op.cc
@@ -0,0 +1,112 @@
+// See docs in ../ops/nn_ops.cc.
+
+#define EIGEN_USE_THREADS
+
+#include "tensorflow/core/framework/numeric_op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/kernels/bias_op.h"
+#include "tensorflow/core/public/tensor.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+namespace tensorflow {
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+typedef Eigen::GpuDevice GPUDevice;
+
+template <typename Device, typename T>
+class BiasOp : public BinaryOp<T> {
+ public:
+  explicit BiasOp(OpKernelConstruction* context) : BinaryOp<T>(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input = context->input(0);
+    const Tensor& bias = context->input(1);
+
+    OP_REQUIRES(context, TensorShapeUtils::IsMatrixOrHigher(input.shape()),
+                errors::InvalidArgument("Input tensor must be at least 2D: ",
+                                        input.shape().DebugString()));
+    OP_REQUIRES(context, TensorShapeUtils::IsVector(bias.shape()),
+                errors::InvalidArgument("Biases must be 1D: ",
+                                        bias.shape().DebugString()));
+    const auto last_dim = input.shape().dims() - 1;
+    OP_REQUIRES(
+        context, bias.shape().dim_size(0) == input.shape().dim_size(last_dim),
+        errors::InvalidArgument(
+            "Must provide as many biases as the last dimension "
+            "of the input tensor: ",
+            bias.shape().DebugString(), " vs. ", input.shape().DebugString()));
+
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, input.shape(), &output));
+
+    switch (input.shape().dims()) {
+      case 2:
+        Compute<2>(context, input, bias, output);
+        break;
+      case 3:
+        Compute<3>(context, input, bias, output);
+        break;
+      case 4:
+        Compute<4>(context, input, bias, output);
+        break;
+      case 5:
+        Compute<5>(context, input, bias, output);
+        break;
+      default:
+        OP_REQUIRES(context, false,
+                    errors::InvalidArgument("Only ranks up to 5 supported: ",
+                                            input.shape().DebugString()));
+    }
+  }
+
+  // Add biases for an input matrix of rank Dims, by using the Bias.
+  template <int Dims>
+  void Compute(OpKernelContext* ctx, const Tensor& input, const Tensor& bias,
+               Tensor* output) {
+    functor::Bias<Device, T, Dims> functor;
+    functor(ctx->eigen_device<Device>(), input.tensor<T, Dims>(), bias.vec<T>(),
+            output->tensor<T, Dims>());
+  }
+};
+
+#define REGISTER_KERNEL(type)                                       \
+  REGISTER_KERNEL_BUILDER(                                          \
+      Name("BiasAdd").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
+      BiasOp<CPUDevice, type>);
+
+TF_CALL_NUMBER_TYPES(REGISTER_KERNEL);
+#undef REGISTER_KERNEL
+
+#if GOOGLE_CUDA
+// Forward declarations of the functor specializations for GPU.
+namespace functor {
+#define DECLARE_GPU_SPEC(T, Dims)                                      \
+  template <>                                                          \
+  void Bias<GPUDevice, T, Dims>::operator()(                           \
+      const GPUDevice& d, typename TTypes<T, Dims>::ConstTensor input, \
+      typename TTypes<T>::ConstVec bias,                               \
+      typename TTypes<T, Dims>::Tensor output);                        \
+  extern template struct Bias<GPUDevice, T, Dims>;
+
+#define DECLARE_GPU_SPECS(T) \
+  DECLARE_GPU_SPEC(T, 2);    \
+  DECLARE_GPU_SPEC(T, 3);    \
+  DECLARE_GPU_SPEC(T, 4);    \
+  DECLARE_GPU_SPEC(T, 5);
+
+TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS);
+}  // namespace functor
+
+// Registration of the GPU implementations.
+#define REGISTER_GPU_KERNEL(type)                                   \
+  REGISTER_KERNEL_BUILDER(                                          \
+      Name("BiasAdd").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
+      BiasOp<GPUDevice, type>);
+
+TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL);
+
+#endif  // GOOGLE_CUDA
+
+}  // namespace tensorflow