diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2016-07-13 14:55:37 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2016-07-13 16:05:23 -0700 |
commit | a41d3e697cddbe7f14343b5a0047d62273b6cf3e (patch) | |
tree | 82b1e149a6ce24c7299f6dbacbc26f0ba33f05f0 /tensorflow/contrib/quantization | |
parent | 4a1376d8e30c04b23cf5c1e7b91931b1a06e54c0 (diff) |
Move computation in quantized_bias_add_op.cc to quantization_utils.h.
Change: 127370125
Diffstat (limited to 'tensorflow/contrib/quantization')
4 files changed, 154 insertions, 76 deletions
diff --git a/tensorflow/contrib/quantization/kernels/BUILD b/tensorflow/contrib/quantization/kernels/BUILD index 8c162a5ab5..6621c450fc 100644 --- a/tensorflow/contrib/quantization/kernels/BUILD +++ b/tensorflow/contrib/quantization/kernels/BUILD @@ -19,6 +19,7 @@ filegroup( name = "android_ops", srcs = [ "dequantize_op.cc", + "quantization_utils.cc", "quantization_utils.h", "quantize_down_and_shrink_range.cc", "quantize_op.cc", @@ -50,6 +51,7 @@ tf_kernel_library( name = "quantized_ops", srcs = [ "dequantize_op.cc", + "quantization_utils.cc", "quantize_down_and_shrink_range.cc", "quantize_op.cc", "quantized_activation_ops.cc", @@ -85,6 +87,7 @@ tf_custom_op_library( name = "_quantized_kernels.so", srcs = [ "dequantize_op.cc", + "quantization_utils.cc", "quantization_utils.h", "quantize_down_and_shrink_range.cc", "quantize_op.cc", diff --git a/tensorflow/contrib/quantization/kernels/quantization_utils.cc b/tensorflow/contrib/quantization/kernels/quantization_utils.cc new file mode 100644 index 0000000000..72651f96b0 --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantization_utils.cc @@ -0,0 +1,42 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" + +namespace tensorflow { + +void GetOutputMinAndMaxForQuantizedAdd(float input_min, float input_max, + float smaller_input_min, + float smaller_input_max, + float* output_min, float* output_max) { + // We need to have a good range to add our two arguments together in. This + // is surprisingly tricky, since it has to satisfy a few different needs: + // - Must be symmetrical around zero, so that 0 + 0 = 0. + // - Must hold the largest of the argument ranges. + // - Should have enough range that the bits of the lowest and highest + // arguments overlap if possible without the lower getting truncated. + // - Should have some headroom so that there's no overflow. + // - Needs to be signed. + // This leads us to use a scheme where we (assuming the inputs are eight bit + // and the output is 32-bit) use the bottom 32 - 17 = 15 bits to store the + // accumulated results. This gives us all the properties we need. + *output_max = + std::max(input_max, std::max(-input_min, std::max(smaller_input_max, + -smaller_input_min))) * + (1 << 17); + *output_min = -(*output_max); +} + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/quantization_utils.h b/tensorflow/contrib/quantization/kernels/quantization_utils.h index 1bff8b194c..521187c5a2 100644 --- a/tensorflow/contrib/quantization/kernels/quantization_utils.h +++ b/tensorflow/contrib/quantization/kernels/quantization_utils.h @@ -381,6 +381,110 @@ Tensor QuantizedTensorToFloat(const Tensor& input, float min, float max) { return result; } +void GetOutputMinAndMaxForQuantizedAdd(float input_min, float input_max, + float smaller_input_min, + float smaller_input_max, + float* output_min, float* output_max); + +// Add <input> and <smaller_input>. If <smaller_input> has fewer elements than +// <input>, then it is broadcast onto <input>. +template <typename T1, typename T2, typename T3> +void QuantizedAddUsingEigen(const Eigen::ThreadPoolDevice& device, + const Tensor& input, float input_min, + float input_max, const Tensor& smaller_input, + float smaller_input_min, float smaller_input_max, + Tensor* output, float* output_min, + float* output_max) { + const auto& input_flat = input.flat<T1>(); + const auto& smaller_input_flat = smaller_input.flat<T2>(); + auto output_flat = output->flat<T3>(); + + GetOutputMinAndMaxForQuantizedAdd(input_min, input_max, smaller_input_min, + smaller_input_max, output_min, output_max); + // To do addition properly, we need to compensate for a possibly unbalanced + // zero point in the total representation. The quantized value that + // represents the real number zero needs to be subtracted before addition to + // make sure that the identity of zero + zero = zero holds. + const T3 zero_in_total_space = + FloatToQuantized<T3>(0.0f, *output_min, *output_max); + + const int64 input_element_count = input.NumElements(); + const int64 smaller_input_element_count = smaller_input.NumElements(); + + QuantizedToFloatStruct<T1> smaller_input_q2f(smaller_input_min, + smaller_input_max); + QuantizedToFloatStruct<T2> input_q2f(input_min, input_max); + FloatToQuantizedStruct<T3> f2q(*output_min, *output_max); + + auto smaller_input_float = + DEQUANTIZE_WITH_EIGEN(smaller_input_flat, smaller_input_q2f); + auto smaller_input_in_total_space = + QUANTIZE_WITH_EIGEN(smaller_input_float, f2q, T3); + + auto input_float = DEQUANTIZE_WITH_EIGEN(input_flat, input_q2f); + auto input_in_total_space = QUANTIZE_WITH_EIGEN(input_float, f2q, T3); + + Eigen::array<Eigen::DenseIndex, 1> bcast; + bcast[0] = input_element_count / smaller_input_element_count; + output_flat.device(device) = + input_in_total_space + + (smaller_input_in_total_space.broadcast(bcast) + zero_in_total_space); +} + +// This is a reference implementation of the bias addition for quantized +// buffers, designed to provide a clear specification for the result we +// want. We'll want to specialize this for particular hardware, and +// probably even fuse it with matrix multiplications in a lot of cases. It's +// important to show the clamping behavior we want in particular. +template <typename T1, typename T2, typename T3> +void QuantizedAdd(const Eigen::ThreadPoolDevice& device, const Tensor& input, + float input_min, float input_max, const Tensor& smaller_input, + float smaller_input_min, float smaller_input_max, + Tensor* output, float* output_min, float* output_max) { + const auto& input_flat = input.flat<T1>(); + const auto& smaller_input_flat = smaller_input.flat<T2>(); + auto output_flat = output->flat<T3>(); + + GetOutputMinAndMaxForQuantizedAdd(input_min, input_max, smaller_input_min, + smaller_input_max, output_min, output_max); + // To do addition properly, we need to compensate for a possibly unbalanced + // zero point in the total representation. The quantized value that + // represents the real number zero needs to be subtracted before addition to + // make sure that the identity of zero + zero = zero holds. + const T3 zero_in_total_space = + FloatToQuantized<T3>(0.0f, *output_min, *output_max); + + const int64 input_element_count = input.NumElements(); + const int64 smaller_input_element_count = smaller_input.NumElements(); + + float total_min = *output_min; + float total_max = *output_max; + const size_t how_many_iterations = + (input_element_count / smaller_input_element_count); + for (size_t iteration = 0; iteration < how_many_iterations; ++iteration) { + const size_t offset = iteration * smaller_input_element_count; + for (int c = 0; c < smaller_input_element_count; ++c) { + const int index = (offset + c); + // The two numbers we're going to add can each be in very different + // ranges (e.g. the quantized value '127' may represent very different + // real numbers in both) so we need to convert them to a common range + // before we sum them. + const T1 input_value = input_flat(index); + const T3 input_in_total_space = RequantizeInNewRange<T1, T3>( + input_value, input_min, input_max, total_min, total_max); + const T2 smaller_input_value = smaller_input_flat(c); + const T3 smaller_input_in_total_space = + RequantizeInNewRange<T2, T3>(smaller_input_value, smaller_input_min, + smaller_input_max, total_min, total_max); + const T3 total_pre = input_in_total_space + smaller_input_in_total_space; + // As noted above, we need to compensate for the offset of the actual + // zero point in the space we're operating in. + const T3 total = total_pre + zero_in_total_space; + output_flat(index) = total; + } + } +} + } // namespace tensorflow #endif // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_QUANTIZATION_UTILS_H_ diff --git a/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc index 2531e1b443..c319eb97da 100644 --- a/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc +++ b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc @@ -57,83 +57,12 @@ class QuantizedBiasAddOp : public OpKernel { Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, input.shape(), &output)); - const auto& input_flat = input.flat<T1>(); - const auto& bias_flat = bias.flat<T2>(); - auto output_flat = output->flat<T3>(); - // We need to have a good range to add our two arguments together in. This - // is surprisingly tricky, since it has to satisfy a few different needs: - // - Must be symmetrical around zero, so that 0 + 0 = 0. - // - Must hold the largest of the argument ranges. - // - Should have enough range that the bits of the lowest and highest - // arguments overlap if possible without the lower getting truncated. - // - Should have some headroom so that there's no overflow. - // - Needs to be signed. - // This leads us to use a scheme where we (assuming the inputs are eight bit - // and the output is 32-bit) use the bottom 32 - 17 = 15 bits to store the - // accumulated results. This gives us all the properties we need. - const float total_max = - std::max(input_max, - std::max(-input_min, std::max(bias_max, -bias_min))) * - (1 << 17); - const float total_min = -total_max; - - // To do addition properly, we need to compensate for a possibly unbalanced - // zero point in the total representation. The quantized value that - // represents the real number zero needs to be subtracted before addition to - // make sure that the identity of zero + zero = zero holds. - const T3 zero_in_total_space = - FloatToQuantized<T3>(0.0f, total_min, total_max); - - const int64 input_element_count = input.NumElements(); - const int64 bias_element_count = bias.NumElements(); - - QuantizedToFloatStruct<T1> bias_q2f(bias_min, bias_max); - QuantizedToFloatStruct<T2> input_q2f(input_min, input_max); - FloatToQuantizedStruct<T3> f2q(total_min, total_max); - - auto bias_float = DEQUANTIZE_WITH_EIGEN(bias_flat, bias_q2f); - auto bias_in_total_space = QUANTIZE_WITH_EIGEN(bias_float, f2q, T3); - - auto input_float = DEQUANTIZE_WITH_EIGEN(input_flat, input_q2f); - auto input_in_total_space = QUANTIZE_WITH_EIGEN(input_float, f2q, T3); - - Eigen::array<Eigen::DenseIndex, 1> bcast; - bcast[0] = input_element_count / bias_element_count; - output_flat.device(context->template eigen_device<CPUDevice>()) = - input_in_total_space + - (bias_in_total_space.broadcast(bcast) + zero_in_total_space); - -#if 0 - const size_t how_many_iterations = - (input_element_count / bias_element_count); - // This is a reference implementation of the bias addition for quantized - // buffers, designed to provide a clear specification for the result we - // want. We'll want to specialize this for particular hardware, and - // probably even fuse it with matrix multiplications in a lot of cases. It's - // important to show the clamping behavior we want in particular. - for (size_t iteration = 0; iteration < how_many_iterations; ++iteration) { - const size_t offset = iteration * bias_element_count; - for (int c = 0; c < bias_element_count; ++c) { - const int index = (offset + c); - // The two numbers we're going to add can each be in very different - // ranges (e.g. the quantized value '127' may represent very different - // real numbers in both) so we need to convert them to a common range - // before we sum them. - const T1 input_value = input_flat(index); - const T3 input_in_total_space = RequantizeInNewRange<T1, T3>( - input_value, input_min, input_max, total_min, total_max); - const T2 bias_value = bias_flat(c); - const T3 bias_in_total_space = RequantizeInNewRange<T2, T3>( - bias_value, bias_min, bias_max, total_min, total_max); - const T3 total_pre = input_in_total_space + bias_in_total_space; - // As noted above, we need to compensate for the offset of the actual - // zero point in the space we're operating in. - const T3 total = total_pre + zero_in_total_space; - output_flat(index) = total; - } - } -#endif + float total_min; + float total_max; + QuantizedAddUsingEigen<T1, T2, T3>( + context->template eigen_device<CPUDevice>(), input, input_min, + input_max, bias, bias_min, bias_max, output, &total_min, &total_max); Tensor* output_min = nullptr; OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min)); |