Move computation in quantized_bias_add_op.cc to quantization_utils.h.

Change: 127370125
author: A. Unique TensorFlower <gardener@tensorflow.org> 2016-07-13 14:55:37 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-07-13 16:05:23 -0700
commit: a41d3e697cddbe7f14343b5a0047d62273b6cf3e (patch)
tree: 82b1e149a6ce24c7299f6dbacbc26f0ba33f05f0 /tensorflow/contrib/quantization
parent: 4a1376d8e30c04b23cf5c1e7b91931b1a06e54c0 (diff)
4 files changed, 154 insertions, 76 deletions
diff --git a/tensorflow/contrib/quantization/kernels/BUILD b/tensorflow/contrib/quantization/kernels/BUILD
index 8c162a5ab5..6621c450fc 100644
--- a/tensorflow/contrib/quantization/kernels/BUILD
+++ b/tensorflow/contrib/quantization/kernels/BUILD
@@ -19,6 +19,7 @@ filegroup(
     name = "android_ops",
     srcs = [
         "dequantize_op.cc",
+        "quantization_utils.cc",
         "quantization_utils.h",
         "quantize_down_and_shrink_range.cc",
         "quantize_op.cc",
@@ -50,6 +51,7 @@ tf_kernel_library(
     name = "quantized_ops",
     srcs = [
         "dequantize_op.cc",
+        "quantization_utils.cc",
         "quantize_down_and_shrink_range.cc",
         "quantize_op.cc",
         "quantized_activation_ops.cc",
@@ -85,6 +87,7 @@ tf_custom_op_library(
     name = "_quantized_kernels.so",
     srcs = [
         "dequantize_op.cc",
+        "quantization_utils.cc",
         "quantization_utils.h",
         "quantize_down_and_shrink_range.cc",
         "quantize_op.cc",
diff --git a/tensorflow/contrib/quantization/kernels/quantization_utils.cc b/tensorflow/contrib/quantization/kernels/quantization_utils.cc
new file mode 100644
index 0000000000..72651f96b0
--- /dev/null
+++ b/tensorflow/contrib/quantization/kernels/quantization_utils.cc
@@ -0,0 +1,42 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
+
+namespace tensorflow {
+
+void GetOutputMinAndMaxForQuantizedAdd(float input_min, float input_max,
+                                       float smaller_input_min,
+                                       float smaller_input_max,
+                                       float* output_min, float* output_max) {
+  // We need to have a good range to add our two arguments together in. This
+  // is surprisingly tricky, since it has to satisfy a few different needs:
+  //  - Must be symmetrical around zero, so that 0 + 0 = 0.
+  //  - Must hold the largest of the argument ranges.
+  //  - Should have enough range that the bits of the lowest and highest
+  //    arguments overlap if possible without the lower getting truncated.
+  //  - Should have some headroom so that there's no overflow.
+  //  - Needs to be signed.
+  // This leads us to use a scheme where we (assuming the inputs are eight bit
+  // and the output is 32-bit) use the bottom 32 - 17 = 15 bits to store the
+  // accumulated results. This gives us all the properties we need.
+  *output_max =
+      std::max(input_max, std::max(-input_min, std::max(smaller_input_max,
+                                                        -smaller_input_min))) *
+      (1 << 17);
+  *output_min = -(*output_max);
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/quantization/kernels/quantization_utils.h b/tensorflow/contrib/quantization/kernels/quantization_utils.h
index 1bff8b194c..521187c5a2 100644
--- a/tensorflow/contrib/quantization/kernels/quantization_utils.h
+++ b/tensorflow/contrib/quantization/kernels/quantization_utils.h
@@ -381,6 +381,110 @@ Tensor QuantizedTensorToFloat(const Tensor& input, float min, float max) {
   return result;
 }
 
+void GetOutputMinAndMaxForQuantizedAdd(float input_min, float input_max,
+                                       float smaller_input_min,
+                                       float smaller_input_max,
+                                       float* output_min, float* output_max);
+
+// Add <input> and <smaller_input>.  If <smaller_input> has fewer elements than
+// <input>, then it is broadcast onto <input>.
+template <typename T1, typename T2, typename T3>
+void QuantizedAddUsingEigen(const Eigen::ThreadPoolDevice& device,
+                            const Tensor& input, float input_min,
+                            float input_max, const Tensor& smaller_input,
+                            float smaller_input_min, float smaller_input_max,
+                            Tensor* output, float* output_min,
+                            float* output_max) {
+  const auto& input_flat = input.flat<T1>();
+  const auto& smaller_input_flat = smaller_input.flat<T2>();
+  auto output_flat = output->flat<T3>();
+
+  GetOutputMinAndMaxForQuantizedAdd(input_min, input_max, smaller_input_min,
+                                    smaller_input_max, output_min, output_max);
+  // To do addition properly, we need to compensate for a possibly unbalanced
+  // zero point in the total representation. The quantized value that
+  // represents the real number zero needs to be subtracted before addition to
+  // make sure that the identity of zero + zero = zero holds.
+  const T3 zero_in_total_space =
+      FloatToQuantized<T3>(0.0f, *output_min, *output_max);
+
+  const int64 input_element_count = input.NumElements();
+  const int64 smaller_input_element_count = smaller_input.NumElements();
+
+  QuantizedToFloatStruct<T1> smaller_input_q2f(smaller_input_min,
+                                               smaller_input_max);
+  QuantizedToFloatStruct<T2> input_q2f(input_min, input_max);
+  FloatToQuantizedStruct<T3> f2q(*output_min, *output_max);
+
+  auto smaller_input_float =
+      DEQUANTIZE_WITH_EIGEN(smaller_input_flat, smaller_input_q2f);
+  auto smaller_input_in_total_space =
+      QUANTIZE_WITH_EIGEN(smaller_input_float, f2q, T3);
+
+  auto input_float = DEQUANTIZE_WITH_EIGEN(input_flat, input_q2f);
+  auto input_in_total_space = QUANTIZE_WITH_EIGEN(input_float, f2q, T3);
+
+  Eigen::array<Eigen::DenseIndex, 1> bcast;
+  bcast[0] = input_element_count / smaller_input_element_count;
+  output_flat.device(device) =
+      input_in_total_space +
+      (smaller_input_in_total_space.broadcast(bcast) + zero_in_total_space);
+}
+
+// This is a reference implementation of the bias addition for quantized
+// buffers, designed to provide a clear specification for the result we
+// want. We'll want to specialize this for particular hardware, and
+// probably even fuse it with matrix multiplications in a lot of cases. It's
+// important to show the clamping behavior we want in particular.
+template <typename T1, typename T2, typename T3>
+void QuantizedAdd(const Eigen::ThreadPoolDevice& device, const Tensor& input,
+                  float input_min, float input_max, const Tensor& smaller_input,
+                  float smaller_input_min, float smaller_input_max,
+                  Tensor* output, float* output_min, float* output_max) {
+  const auto& input_flat = input.flat<T1>();
+  const auto& smaller_input_flat = smaller_input.flat<T2>();
+  auto output_flat = output->flat<T3>();
+
+  GetOutputMinAndMaxForQuantizedAdd(input_min, input_max, smaller_input_min,
+                                    smaller_input_max, output_min, output_max);
+  // To do addition properly, we need to compensate for a possibly unbalanced
+  // zero point in the total representation. The quantized value that
+  // represents the real number zero needs to be subtracted before addition to
+  // make sure that the identity of zero + zero = zero holds.
+  const T3 zero_in_total_space =
+      FloatToQuantized<T3>(0.0f, *output_min, *output_max);
+
+  const int64 input_element_count = input.NumElements();
+  const int64 smaller_input_element_count = smaller_input.NumElements();
+
+  float total_min = *output_min;
+  float total_max = *output_max;
+  const size_t how_many_iterations =
+      (input_element_count / smaller_input_element_count);
+  for (size_t iteration = 0; iteration < how_many_iterations; ++iteration) {
+    const size_t offset = iteration * smaller_input_element_count;
+    for (int c = 0; c < smaller_input_element_count; ++c) {
+      const int index = (offset + c);
+      // The two numbers we're going to add can each be in very different
+      // ranges (e.g. the quantized value '127' may represent very different
+      // real numbers in both) so we need to convert them to a common range
+      // before we sum them.
+      const T1 input_value = input_flat(index);
+      const T3 input_in_total_space = RequantizeInNewRange<T1, T3>(
+          input_value, input_min, input_max, total_min, total_max);
+      const T2 smaller_input_value = smaller_input_flat(c);
+      const T3 smaller_input_in_total_space =
+          RequantizeInNewRange<T2, T3>(smaller_input_value, smaller_input_min,
+                                       smaller_input_max, total_min, total_max);
+      const T3 total_pre = input_in_total_space + smaller_input_in_total_space;
+      // As noted above, we need to compensate for the offset of the actual
+      // zero point in the space we're operating in.
+      const T3 total = total_pre + zero_in_total_space;
+      output_flat(index) = total;
+    }
+  }
+}
+
 }  // namespace tensorflow
 
 #endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_QUANTIZATION_UTILS_H_
diff --git a/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc
index 2531e1b443..c319eb97da 100644
--- a/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc
@@ -57,83 +57,12 @@ class QuantizedBiasAddOp : public OpKernel {
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context,
                    context->allocate_output(0, input.shape(), &output));
-    const auto& input_flat = input.flat<T1>();
-    const auto& bias_flat = bias.flat<T2>();
-    auto output_flat = output->flat<T3>();
 
-    // We need to have a good range to add our two arguments together in. This
-    // is surprisingly tricky, since it has to satisfy a few different needs:
-    //  - Must be symmetrical around zero, so that 0 + 0 = 0.
-    //  - Must hold the largest of the argument ranges.
-    //  - Should have enough range that the bits of the lowest and highest
-    //    arguments overlap if possible without the lower getting truncated.
-    //  - Should have some headroom so that there's no overflow.
-    //  - Needs to be signed.
-    // This leads us to use a scheme where we (assuming the inputs are eight bit
-    // and the output is 32-bit) use the bottom 32 - 17 = 15 bits to store the
-    // accumulated results. This gives us all the properties we need.
-    const float total_max =
-        std::max(input_max,
-                 std::max(-input_min, std::max(bias_max, -bias_min))) *
-        (1 << 17);
-    const float total_min = -total_max;
-
-    // To do addition properly, we need to compensate for a possibly unbalanced
-    // zero point in the total representation. The quantized value that
-    // represents the real number zero needs to be subtracted before addition to
-    // make sure that the identity of zero + zero = zero holds.
-    const T3 zero_in_total_space =
-        FloatToQuantized<T3>(0.0f, total_min, total_max);
-
-    const int64 input_element_count = input.NumElements();
-    const int64 bias_element_count = bias.NumElements();
-
-    QuantizedToFloatStruct<T1> bias_q2f(bias_min, bias_max);
-    QuantizedToFloatStruct<T2> input_q2f(input_min, input_max);
-    FloatToQuantizedStruct<T3> f2q(total_min, total_max);
-
-    auto bias_float = DEQUANTIZE_WITH_EIGEN(bias_flat, bias_q2f);
-    auto bias_in_total_space = QUANTIZE_WITH_EIGEN(bias_float, f2q, T3);
-
-    auto input_float = DEQUANTIZE_WITH_EIGEN(input_flat, input_q2f);
-    auto input_in_total_space = QUANTIZE_WITH_EIGEN(input_float, f2q, T3);
-
-    Eigen::array<Eigen::DenseIndex, 1> bcast;
-    bcast[0] = input_element_count / bias_element_count;
-    output_flat.device(context->template eigen_device<CPUDevice>()) =
-        input_in_total_space +
-        (bias_in_total_space.broadcast(bcast) + zero_in_total_space);
-
-#if 0
-    const size_t how_many_iterations =
-        (input_element_count / bias_element_count);
-    // This is a reference implementation of the bias addition for quantized
-    // buffers, designed to provide a clear specification for the result we
-    // want. We'll want to specialize this for particular hardware, and
-    // probably even fuse it with matrix multiplications in a lot of cases. It's
-    // important to show the clamping behavior we want in particular.
-    for (size_t iteration = 0; iteration < how_many_iterations; ++iteration) {
-      const size_t offset = iteration * bias_element_count;
-      for (int c = 0; c < bias_element_count; ++c) {
-        const int index = (offset + c);
-        // The two numbers we're going to add can each be in very different
-        // ranges (e.g. the quantized value '127' may represent very different
-        // real numbers in both) so we need to convert them to a common range
-        // before we sum them.
-        const T1 input_value = input_flat(index);
-        const T3 input_in_total_space = RequantizeInNewRange<T1, T3>(
-            input_value, input_min, input_max, total_min, total_max);
-        const T2 bias_value = bias_flat(c);
-        const T3 bias_in_total_space = RequantizeInNewRange<T2, T3>(
-            bias_value, bias_min, bias_max, total_min, total_max);
-        const T3 total_pre = input_in_total_space + bias_in_total_space;
-        // As noted above, we need to compensate for the offset of the actual
-        // zero point in the space we're operating in.
-        const T3 total = total_pre + zero_in_total_space;
-        output_flat(index) = total;
-      }
-    }
-#endif
+    float total_min;
+    float total_max;
+    QuantizedAddUsingEigen<T1, T2, T3>(
+        context->template eigen_device<CPUDevice>(), input, input_min,
+        input_max, bias, bias_min, bias_max, output, &total_min, &total_max);
 
     Tensor* output_min = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));
author	A. Unique TensorFlower <gardener@tensorflow.org>	2016-07-13 14:55:37 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-07-13 16:05:23 -0700
commit	a41d3e697cddbe7f14343b5a0047d62273b6cf3e (patch)
tree	82b1e149a6ce24c7299f6dbacbc26f0ba33f05f0 /tensorflow/contrib/quantization
parent	4a1376d8e30c04b23cf5c1e7b91931b1a06e54c0 (diff)