aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/quantization
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2016-07-13 07:30:32 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-07-13 08:32:39 -0700
commita4e4cc47acd41a729e23bd0839f625534d77b870 (patch)
tree36ad0c73ba49c3e8434647a9797c8f9e3003d2b8 /tensorflow/contrib/quantization
parent19cf789efab823678ffbd2d0ab98d03c3940cc7a (diff)
Change QuantizedBiasAdd op to use eigen floating point computation
instead of reference computation. Change: 127319811
Diffstat (limited to 'tensorflow/contrib/quantization')
-rw-r--r--tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc29
1 files changed, 25 insertions, 4 deletions
diff --git a/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc
index 07c72b716d..2531e1b443 100644
--- a/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc
@@ -24,6 +24,8 @@ limitations under the License.
namespace tensorflow {
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
template <class T1, class T2, class T3>
class QuantizedBiasAddOp : public OpKernel {
public:
@@ -56,12 +58,8 @@ class QuantizedBiasAddOp : public OpKernel {
OP_REQUIRES_OK(context,
context->allocate_output(0, input.shape(), &output));
const auto& input_flat = input.flat<T1>();
- const int64 input_element_count = input.NumElements();
const auto& bias_flat = bias.flat<T2>();
- const int64 bias_element_count = bias.NumElements();
auto output_flat = output->flat<T3>();
- const size_t how_many_iterations =
- (input_element_count / bias_element_count);
// We need to have a good range to add our two arguments together in. This
// is surprisingly tricky, since it has to satisfy a few different needs:
@@ -87,6 +85,28 @@ class QuantizedBiasAddOp : public OpKernel {
const T3 zero_in_total_space =
FloatToQuantized<T3>(0.0f, total_min, total_max);
+ const int64 input_element_count = input.NumElements();
+ const int64 bias_element_count = bias.NumElements();
+
+ QuantizedToFloatStruct<T1> bias_q2f(bias_min, bias_max);
+ QuantizedToFloatStruct<T2> input_q2f(input_min, input_max);
+ FloatToQuantizedStruct<T3> f2q(total_min, total_max);
+
+ auto bias_float = DEQUANTIZE_WITH_EIGEN(bias_flat, bias_q2f);
+ auto bias_in_total_space = QUANTIZE_WITH_EIGEN(bias_float, f2q, T3);
+
+ auto input_float = DEQUANTIZE_WITH_EIGEN(input_flat, input_q2f);
+ auto input_in_total_space = QUANTIZE_WITH_EIGEN(input_float, f2q, T3);
+
+ Eigen::array<Eigen::DenseIndex, 1> bcast;
+ bcast[0] = input_element_count / bias_element_count;
+ output_flat.device(context->template eigen_device<CPUDevice>()) =
+ input_in_total_space +
+ (bias_in_total_space.broadcast(bcast) + zero_in_total_space);
+
+#if 0
+ const size_t how_many_iterations =
+ (input_element_count / bias_element_count);
// This is a reference implementation of the bias addition for quantized
// buffers, designed to provide a clear specification for the result we
// want. We'll want to specialize this for particular hardware, and
@@ -113,6 +133,7 @@ class QuantizedBiasAddOp : public OpKernel {
output_flat(index) = total;
}
}
+#endif
Tensor* output_min = nullptr;
OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));