aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/quantization
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2016-07-13 14:55:37 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-07-13 16:05:23 -0700
commita41d3e697cddbe7f14343b5a0047d62273b6cf3e (patch)
tree82b1e149a6ce24c7299f6dbacbc26f0ba33f05f0 /tensorflow/contrib/quantization
parent4a1376d8e30c04b23cf5c1e7b91931b1a06e54c0 (diff)
Move computation in quantized_bias_add_op.cc to quantization_utils.h.
Change: 127370125
Diffstat (limited to 'tensorflow/contrib/quantization')
-rw-r--r--tensorflow/contrib/quantization/kernels/BUILD3
-rw-r--r--tensorflow/contrib/quantization/kernels/quantization_utils.cc42
-rw-r--r--tensorflow/contrib/quantization/kernels/quantization_utils.h104
-rw-r--r--tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc81
4 files changed, 154 insertions, 76 deletions
diff --git a/tensorflow/contrib/quantization/kernels/BUILD b/tensorflow/contrib/quantization/kernels/BUILD
index 8c162a5ab5..6621c450fc 100644
--- a/tensorflow/contrib/quantization/kernels/BUILD
+++ b/tensorflow/contrib/quantization/kernels/BUILD
@@ -19,6 +19,7 @@ filegroup(
name = "android_ops",
srcs = [
"dequantize_op.cc",
+ "quantization_utils.cc",
"quantization_utils.h",
"quantize_down_and_shrink_range.cc",
"quantize_op.cc",
@@ -50,6 +51,7 @@ tf_kernel_library(
name = "quantized_ops",
srcs = [
"dequantize_op.cc",
+ "quantization_utils.cc",
"quantize_down_and_shrink_range.cc",
"quantize_op.cc",
"quantized_activation_ops.cc",
@@ -85,6 +87,7 @@ tf_custom_op_library(
name = "_quantized_kernels.so",
srcs = [
"dequantize_op.cc",
+ "quantization_utils.cc",
"quantization_utils.h",
"quantize_down_and_shrink_range.cc",
"quantize_op.cc",
diff --git a/tensorflow/contrib/quantization/kernels/quantization_utils.cc b/tensorflow/contrib/quantization/kernels/quantization_utils.cc
new file mode 100644
index 0000000000..72651f96b0
--- /dev/null
+++ b/tensorflow/contrib/quantization/kernels/quantization_utils.cc
@@ -0,0 +1,42 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
+
+namespace tensorflow {
+
+void GetOutputMinAndMaxForQuantizedAdd(float input_min, float input_max,
+ float smaller_input_min,
+ float smaller_input_max,
+ float* output_min, float* output_max) {
+ // We need to have a good range to add our two arguments together in. This
+ // is surprisingly tricky, since it has to satisfy a few different needs:
+ // - Must be symmetrical around zero, so that 0 + 0 = 0.
+ // - Must hold the largest of the argument ranges.
+ // - Should have enough range that the bits of the lowest and highest
+ // arguments overlap if possible without the lower getting truncated.
+ // - Should have some headroom so that there's no overflow.
+ // - Needs to be signed.
+ // This leads us to use a scheme where we (assuming the inputs are eight bit
+ // and the output is 32-bit) use the bottom 32 - 17 = 15 bits to store the
+ // accumulated results. This gives us all the properties we need.
+ *output_max =
+ std::max(input_max, std::max(-input_min, std::max(smaller_input_max,
+ -smaller_input_min))) *
+ (1 << 17);
+ *output_min = -(*output_max);
+}
+
+} // namespace tensorflow
diff --git a/tensorflow/contrib/quantization/kernels/quantization_utils.h b/tensorflow/contrib/quantization/kernels/quantization_utils.h
index 1bff8b194c..521187c5a2 100644
--- a/tensorflow/contrib/quantization/kernels/quantization_utils.h
+++ b/tensorflow/contrib/quantization/kernels/quantization_utils.h
@@ -381,6 +381,110 @@ Tensor QuantizedTensorToFloat(const Tensor& input, float min, float max) {
return result;
}
+void GetOutputMinAndMaxForQuantizedAdd(float input_min, float input_max,
+ float smaller_input_min,
+ float smaller_input_max,
+ float* output_min, float* output_max);
+
+// Add <input> and <smaller_input>. If <smaller_input> has fewer elements than
+// <input>, then it is broadcast onto <input>.
+template <typename T1, typename T2, typename T3>
+void QuantizedAddUsingEigen(const Eigen::ThreadPoolDevice& device,
+ const Tensor& input, float input_min,
+ float input_max, const Tensor& smaller_input,
+ float smaller_input_min, float smaller_input_max,
+ Tensor* output, float* output_min,
+ float* output_max) {
+ const auto& input_flat = input.flat<T1>();
+ const auto& smaller_input_flat = smaller_input.flat<T2>();
+ auto output_flat = output->flat<T3>();
+
+ GetOutputMinAndMaxForQuantizedAdd(input_min, input_max, smaller_input_min,
+ smaller_input_max, output_min, output_max);
+ // To do addition properly, we need to compensate for a possibly unbalanced
+ // zero point in the total representation. The quantized value that
+ // represents the real number zero needs to be subtracted before addition to
+ // make sure that the identity of zero + zero = zero holds.
+ const T3 zero_in_total_space =
+ FloatToQuantized<T3>(0.0f, *output_min, *output_max);
+
+ const int64 input_element_count = input.NumElements();
+ const int64 smaller_input_element_count = smaller_input.NumElements();
+
+ QuantizedToFloatStruct<T1> smaller_input_q2f(smaller_input_min,
+ smaller_input_max);
+ QuantizedToFloatStruct<T2> input_q2f(input_min, input_max);
+ FloatToQuantizedStruct<T3> f2q(*output_min, *output_max);
+
+ auto smaller_input_float =
+ DEQUANTIZE_WITH_EIGEN(smaller_input_flat, smaller_input_q2f);
+ auto smaller_input_in_total_space =
+ QUANTIZE_WITH_EIGEN(smaller_input_float, f2q, T3);
+
+ auto input_float = DEQUANTIZE_WITH_EIGEN(input_flat, input_q2f);
+ auto input_in_total_space = QUANTIZE_WITH_EIGEN(input_float, f2q, T3);
+
+ Eigen::array<Eigen::DenseIndex, 1> bcast;
+ bcast[0] = input_element_count / smaller_input_element_count;
+ output_flat.device(device) =
+ input_in_total_space +
+ (smaller_input_in_total_space.broadcast(bcast) + zero_in_total_space);
+}
+
+// This is a reference implementation of the bias addition for quantized
+// buffers, designed to provide a clear specification for the result we
+// want. We'll want to specialize this for particular hardware, and
+// probably even fuse it with matrix multiplications in a lot of cases. It's
+// important to show the clamping behavior we want in particular.
+template <typename T1, typename T2, typename T3>
+void QuantizedAdd(const Eigen::ThreadPoolDevice& device, const Tensor& input,
+ float input_min, float input_max, const Tensor& smaller_input,
+ float smaller_input_min, float smaller_input_max,
+ Tensor* output, float* output_min, float* output_max) {
+ const auto& input_flat = input.flat<T1>();
+ const auto& smaller_input_flat = smaller_input.flat<T2>();
+ auto output_flat = output->flat<T3>();
+
+ GetOutputMinAndMaxForQuantizedAdd(input_min, input_max, smaller_input_min,
+ smaller_input_max, output_min, output_max);
+ // To do addition properly, we need to compensate for a possibly unbalanced
+ // zero point in the total representation. The quantized value that
+ // represents the real number zero needs to be subtracted before addition to
+ // make sure that the identity of zero + zero = zero holds.
+ const T3 zero_in_total_space =
+ FloatToQuantized<T3>(0.0f, *output_min, *output_max);
+
+ const int64 input_element_count = input.NumElements();
+ const int64 smaller_input_element_count = smaller_input.NumElements();
+
+ float total_min = *output_min;
+ float total_max = *output_max;
+ const size_t how_many_iterations =
+ (input_element_count / smaller_input_element_count);
+ for (size_t iteration = 0; iteration < how_many_iterations; ++iteration) {
+ const size_t offset = iteration * smaller_input_element_count;
+ for (int c = 0; c < smaller_input_element_count; ++c) {
+ const int index = (offset + c);
+ // The two numbers we're going to add can each be in very different
+ // ranges (e.g. the quantized value '127' may represent very different
+ // real numbers in both) so we need to convert them to a common range
+ // before we sum them.
+ const T1 input_value = input_flat(index);
+ const T3 input_in_total_space = RequantizeInNewRange<T1, T3>(
+ input_value, input_min, input_max, total_min, total_max);
+ const T2 smaller_input_value = smaller_input_flat(c);
+ const T3 smaller_input_in_total_space =
+ RequantizeInNewRange<T2, T3>(smaller_input_value, smaller_input_min,
+ smaller_input_max, total_min, total_max);
+ const T3 total_pre = input_in_total_space + smaller_input_in_total_space;
+ // As noted above, we need to compensate for the offset of the actual
+ // zero point in the space we're operating in.
+ const T3 total = total_pre + zero_in_total_space;
+ output_flat(index) = total;
+ }
+ }
+}
+
} // namespace tensorflow
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_QUANTIZATION_UTILS_H_
diff --git a/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc
index 2531e1b443..c319eb97da 100644
--- a/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc
@@ -57,83 +57,12 @@ class QuantizedBiasAddOp : public OpKernel {
Tensor* output = nullptr;
OP_REQUIRES_OK(context,
context->allocate_output(0, input.shape(), &output));
- const auto& input_flat = input.flat<T1>();
- const auto& bias_flat = bias.flat<T2>();
- auto output_flat = output->flat<T3>();
- // We need to have a good range to add our two arguments together in. This
- // is surprisingly tricky, since it has to satisfy a few different needs:
- // - Must be symmetrical around zero, so that 0 + 0 = 0.
- // - Must hold the largest of the argument ranges.
- // - Should have enough range that the bits of the lowest and highest
- // arguments overlap if possible without the lower getting truncated.
- // - Should have some headroom so that there's no overflow.
- // - Needs to be signed.
- // This leads us to use a scheme where we (assuming the inputs are eight bit
- // and the output is 32-bit) use the bottom 32 - 17 = 15 bits to store the
- // accumulated results. This gives us all the properties we need.
- const float total_max =
- std::max(input_max,
- std::max(-input_min, std::max(bias_max, -bias_min))) *
- (1 << 17);
- const float total_min = -total_max;
-
- // To do addition properly, we need to compensate for a possibly unbalanced
- // zero point in the total representation. The quantized value that
- // represents the real number zero needs to be subtracted before addition to
- // make sure that the identity of zero + zero = zero holds.
- const T3 zero_in_total_space =
- FloatToQuantized<T3>(0.0f, total_min, total_max);
-
- const int64 input_element_count = input.NumElements();
- const int64 bias_element_count = bias.NumElements();
-
- QuantizedToFloatStruct<T1> bias_q2f(bias_min, bias_max);
- QuantizedToFloatStruct<T2> input_q2f(input_min, input_max);
- FloatToQuantizedStruct<T3> f2q(total_min, total_max);
-
- auto bias_float = DEQUANTIZE_WITH_EIGEN(bias_flat, bias_q2f);
- auto bias_in_total_space = QUANTIZE_WITH_EIGEN(bias_float, f2q, T3);
-
- auto input_float = DEQUANTIZE_WITH_EIGEN(input_flat, input_q2f);
- auto input_in_total_space = QUANTIZE_WITH_EIGEN(input_float, f2q, T3);
-
- Eigen::array<Eigen::DenseIndex, 1> bcast;
- bcast[0] = input_element_count / bias_element_count;
- output_flat.device(context->template eigen_device<CPUDevice>()) =
- input_in_total_space +
- (bias_in_total_space.broadcast(bcast) + zero_in_total_space);
-
-#if 0
- const size_t how_many_iterations =
- (input_element_count / bias_element_count);
- // This is a reference implementation of the bias addition for quantized
- // buffers, designed to provide a clear specification for the result we
- // want. We'll want to specialize this for particular hardware, and
- // probably even fuse it with matrix multiplications in a lot of cases. It's
- // important to show the clamping behavior we want in particular.
- for (size_t iteration = 0; iteration < how_many_iterations; ++iteration) {
- const size_t offset = iteration * bias_element_count;
- for (int c = 0; c < bias_element_count; ++c) {
- const int index = (offset + c);
- // The two numbers we're going to add can each be in very different
- // ranges (e.g. the quantized value '127' may represent very different
- // real numbers in both) so we need to convert them to a common range
- // before we sum them.
- const T1 input_value = input_flat(index);
- const T3 input_in_total_space = RequantizeInNewRange<T1, T3>(
- input_value, input_min, input_max, total_min, total_max);
- const T2 bias_value = bias_flat(c);
- const T3 bias_in_total_space = RequantizeInNewRange<T2, T3>(
- bias_value, bias_min, bias_max, total_min, total_max);
- const T3 total_pre = input_in_total_space + bias_in_total_space;
- // As noted above, we need to compensate for the offset of the actual
- // zero point in the space we're operating in.
- const T3 total = total_pre + zero_in_total_space;
- output_flat(index) = total;
- }
- }
-#endif
+ float total_min;
+ float total_max;
+ QuantizedAddUsingEigen<T1, T2, T3>(
+ context->template eigen_device<CPUDevice>(), input, input_min,
+ input_max, bias, bias_min, bias_max, output, &total_min, &total_max);
Tensor* output_min = nullptr;
OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));