Change QuantizedBiasAdd op to use eigen floating point computation

instead of reference computation. Change: 127319811
author: A. Unique TensorFlower <gardener@tensorflow.org> 2016-07-13 07:30:32 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-07-13 08:32:39 -0700
commit: a4e4cc47acd41a729e23bd0839f625534d77b870 (patch)
tree: 36ad0c73ba49c3e8434647a9797c8f9e3003d2b8 /tensorflow/contrib/quantization
parent: 19cf789efab823678ffbd2d0ab98d03c3940cc7a (diff)
1 files changed, 25 insertions, 4 deletions
diff --git a/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc
index 07c72b716d..2531e1b443 100644
--- a/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc
@@ -24,6 +24,8 @@ limitations under the License.
 
 namespace tensorflow {
 
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
 template <class T1, class T2, class T3>
 class QuantizedBiasAddOp : public OpKernel {
  public:
@@ -56,12 +58,8 @@ class QuantizedBiasAddOp : public OpKernel {
     OP_REQUIRES_OK(context,
                    context->allocate_output(0, input.shape(), &output));
     const auto& input_flat = input.flat<T1>();
-    const int64 input_element_count = input.NumElements();
     const auto& bias_flat = bias.flat<T2>();
-    const int64 bias_element_count = bias.NumElements();
     auto output_flat = output->flat<T3>();
-    const size_t how_many_iterations =
-        (input_element_count / bias_element_count);
 
     // We need to have a good range to add our two arguments together in. This
     // is surprisingly tricky, since it has to satisfy a few different needs:
@@ -87,6 +85,28 @@ class QuantizedBiasAddOp : public OpKernel {
     const T3 zero_in_total_space =
         FloatToQuantized<T3>(0.0f, total_min, total_max);
 
+    const int64 input_element_count = input.NumElements();
+    const int64 bias_element_count = bias.NumElements();
+
+    QuantizedToFloatStruct<T1> bias_q2f(bias_min, bias_max);
+    QuantizedToFloatStruct<T2> input_q2f(input_min, input_max);
+    FloatToQuantizedStruct<T3> f2q(total_min, total_max);
+
+    auto bias_float = DEQUANTIZE_WITH_EIGEN(bias_flat, bias_q2f);
+    auto bias_in_total_space = QUANTIZE_WITH_EIGEN(bias_float, f2q, T3);
+
+    auto input_float = DEQUANTIZE_WITH_EIGEN(input_flat, input_q2f);
+    auto input_in_total_space = QUANTIZE_WITH_EIGEN(input_float, f2q, T3);
+
+    Eigen::array<Eigen::DenseIndex, 1> bcast;
+    bcast[0] = input_element_count / bias_element_count;
+    output_flat.device(context->template eigen_device<CPUDevice>()) =
+        input_in_total_space +
+        (bias_in_total_space.broadcast(bcast) + zero_in_total_space);
+
+#if 0
+    const size_t how_many_iterations =
+        (input_element_count / bias_element_count);
     // This is a reference implementation of the bias addition for quantized
     // buffers, designed to provide a clear specification for the result we
     // want. We'll want to specialize this for particular hardware, and
@@ -113,6 +133,7 @@ class QuantizedBiasAddOp : public OpKernel {
         output_flat(index) = total;
       }
     }
+#endif
 
     Tensor* output_min = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));
author	A. Unique TensorFlower <gardener@tensorflow.org>	2016-07-13 07:30:32 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-07-13 08:32:39 -0700
commit	a4e4cc47acd41a729e23bd0839f625534d77b870 (patch)
tree	36ad0c73ba49c3e8434647a9797c8f9e3003d2b8 /tensorflow/contrib/quantization
parent	19cf789efab823678ffbd2d0ab98d03c3940cc7a (diff)