diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2016-07-12 18:57:22 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2016-07-12 20:03:05 -0700 |
commit | 092c06351f8f30d9e2e92a5ed30d9c0afc5b3a33 (patch) | |
tree | bb2de033da449b7f888696165ad62efd71b560a3 /tensorflow/contrib/quantization | |
parent | 10211a6c8e168f54ac0096d6fc7bc6fd7346e6fe (diff) |
Enable optimized requantization function.
Fix rezeroing of input range in optimized requantize function to account for
signed input.
Change some computations to be in scaled-up fixed point instead of after scaling back down.
Make rounding delta always be positive.
Change: 127272485
Diffstat (limited to 'tensorflow/contrib/quantization')
3 files changed, 106 insertions, 74 deletions
diff --git a/tensorflow/contrib/quantization/kernels/quantization_utils.h b/tensorflow/contrib/quantization/kernels/quantization_utils.h index e43b8d7f0d..1bff8b194c 100644 --- a/tensorflow/contrib/quantization/kernels/quantization_utils.h +++ b/tensorflow/contrib/quantization/kernels/quantization_utils.h @@ -191,9 +191,10 @@ inline T2 RequantizeInNewRange(T1 input, float min_input, float max_input, } template <class T1, class T2> -inline void RequantizeManyInNewRange(T1* input, size_t count, float min_input, - float max_input, float min_output, - float max_output, T2* output) { +inline void RequantizeManyInNewRange(const T1* input, size_t count, + float min_input, float max_input, + float min_output, float max_output, + T2* output) { for (size_t index = 0; index < count; ++index) { const float input_float = QuantizedToFloat<T1>(input[index], min_input, max_input); @@ -206,7 +207,7 @@ inline void RequantizeManyInNewRange(T1* input, size_t count, float min_input, // possible using only fixed-point math for the inner loop. template <> inline void RequantizeManyInNewRange<qint32, quint8>( - qint32* input, size_t count, float min_input, float max_input, + const qint32* input, size_t count, float min_input, float max_input, float min_output, float max_output, quint8* output) { // Initially we calculate all the constants we need once, before we go into // the inner loop. If this is updated, also update the Eigen version. @@ -215,16 +216,17 @@ inline void RequantizeManyInNewRange<qint32, quint8>( const float output_range = max_output - min_output; const float recip_output_range = output_range == 0.0 ? 0.0 : (255.0 / output_range); - const int64 recip_output_range_fp = - static_cast<int64>(recip_output_range * (1 << fp_shift)); + const float input_rezero = (min_input + max_input) / 2.0; const int64 range_scale_fp = output_range == 0.0 ? 0.0 : static_cast<int64>(255.0 * (1 << fp_shift) * input_range / output_range); const int64 input_offset_fp = - (min_input * recip_output_range_fp) + (range_scale_fp >> 1); + static_cast<int64>(input_rezero * recip_output_range * (1 << fp_shift)); const int64 output_offset_fp = - output_range == 0.0 ? 0.0 : round((min_output * 255.0) / output_range); + output_range == 0.0 ? 0 : static_cast<int64>((1 << fp_shift) * + (min_output * 255.0) / + output_range); const int64 rounding_delta = 1 << (fp_shift - 1); // Inside this loop we just do minimal adds, multiplies, and shifts, in a way @@ -235,11 +237,9 @@ inline void RequantizeManyInNewRange<qint32, quint8>( const int64 input_value = static_cast<int64>(input[index]); const int64 fp_value = ((input_value * range_scale_fp) >> 32) + input_offset_fp; - const int64 round_intermediate = - ((fp_value >= 0) ? (fp_value + rounding_delta) - : (fp_value - rounding_delta)) >> - fp_shift; - int64 quantized_int64 = (round_intermediate - output_offset_fp); + const int64 offset_intermediate = fp_value - output_offset_fp; + const int64 round_intermediate = offset_intermediate + rounding_delta; + int64 quantized_int64 = round_intermediate >> fp_shift; quantized_int64 = std::max(quantized_int64, 0LL); quantized_int64 = std::min(quantized_int64, 255LL); output[index] = static_cast<quint8>(static_cast<int32>(quantized_int64)); @@ -269,15 +269,11 @@ inline void RequantizeManyInNewRangeUsingEigen( output->flat<T2>().device(device) = input_requantized; } -#if 0 // See RequantizeManyInNewRange() for a non-eigen reference implementation. // // Because converting 32-bit accumulated results down to eight bit is a common // case, we have a specialized code path to handle it as efficiently as // possible using only fixed-point math for the inner loop. -// -// See #ifdefed out test in quantization_utils_test.cc -// (RequantizeManyInNewRange32To8BitUsingEigen). template <> inline void RequantizeManyInNewRangeUsingEigen<qint32, quint8>( const Eigen::ThreadPoolDevice& device, const Tensor& input, float min_input, @@ -289,14 +285,15 @@ inline void RequantizeManyInNewRangeUsingEigen<qint32, quint8>( const float output_range = max_output - min_output; const float recip_output_range = output_range == 0.0 ? 0.0 : (255.0 / output_range); - const int64 recip_output_range_fp = - static_cast<int64>(recip_output_range * (1 << fp_shift)); + const float input_rezero = (min_input + max_input) / 2.0; const int64 range_scale_fp = static_cast<int64>(255.0 * (1 << fp_shift) * input_range / output_range); const int64 input_offset_fp = - (min_input * recip_output_range_fp) + (range_scale_fp >> 1); + static_cast<int64>(input_rezero * recip_output_range * (1 << fp_shift)); const int64 output_offset_fp = - output_range == 0.0 ? 0.0 : round((min_output * 255.0) / output_range); + output_range == 0.0 ? 0 : static_cast<int64>((1 << fp_shift) * + (min_output * 255.0) / + output_range); const int64 rounding_delta = 1 << (fp_shift - 1); // Inside this eigen expression we just do minimal adds, multiplies, and @@ -305,17 +302,14 @@ inline void RequantizeManyInNewRangeUsingEigen<qint32, quint8>( auto input_array = input.flat<qint32>(); auto fp_value = ((input_array.template cast<int64>() * range_scale_fp) .unaryExpr(int64_right_shift_op<32>())) + - input_offset_fp; - auto round_intermediate = (fp_value + rounding_delta * fp_value.sign()) - .unaryExpr(int64_right_shift_op<fp_shift>()); - auto input_requantized = (round_intermediate - output_offset_fp) - .cwiseMax(0LL) + (input_offset_fp - output_offset_fp + rounding_delta); + auto intermediate = fp_value.unaryExpr(int64_right_shift_op<fp_shift>()); + auto input_requantized = intermediate.cwiseMax(0LL) .cwiseMin(255LL) .template cast<int32>() .template cast<quint8>(); output->flat<quint8>().device(device) = input_requantized; } -#endif // REQUIRES: 'result->NumElements() == input.NumElements()' template <class T> diff --git a/tensorflow/contrib/quantization/kernels/quantization_utils_test.cc b/tensorflow/contrib/quantization/kernels/quantization_utils_test.cc index 3d4356a332..f4a4e8d962 100644 --- a/tensorflow/contrib/quantization/kernels/quantization_utils_test.cc +++ b/tensorflow/contrib/quantization/kernels/quantization_utils_test.cc @@ -25,12 +25,56 @@ limitations under the License. #include "tensorflow/core/framework/types.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/random/simple_philox.h" +#include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { class QuantizationUtilsTest : public ::testing::Test { protected: + void TestRequantizeMany(Eigen::ThreadPoolDevice* eigen_device, + float input_min, float input_max, float output_min, + float output_max, + const std::vector<qint32>& values_quantized, + int tolerance = 1) { + const int values_count = values_quantized.size(); + std::vector<quint8> expected_values; + for (int value_index = 0; value_index < values_count; ++value_index) { + expected_values.push_back(FloatToQuantized<quint8>( + QuantizedToFloat(values_quantized[value_index], input_min, input_max), + output_min, output_max)); + } + + Tensor i_tensor = + tensorflow::test::AsTensor(gtl::ArraySlice<qint32>(values_quantized)); + Tensor o_tensor(DT_QUINT8, TensorShape{values_count}); + auto output_values = o_tensor.flat<quint8>(); + + if (eigen_device == nullptr) { + auto input_array = i_tensor.flat<qint32>(); + RequantizeManyInNewRange(input_array.data(), input_array.size(), + input_min, input_max, output_min, output_max, + output_values.data()); + } else { + RequantizeManyInNewRangeUsingEigen<qint32, quint8>( + *eigen_device, i_tensor, input_min, input_max, output_min, output_max, + &o_tensor); + } + + const string tolerance_str = strings::StrCat("+-", tolerance); + for (size_t value_index = 0; value_index < values_count; ++value_index) { + int e = expected_values[value_index]; + int v = output_values(value_index); + ASSERT_TRUE(std::abs(e - v) <= tolerance) + << "actual=" << v << ", expected=" << e << tolerance_str + << ", values_quantized[" << value_index + << "]=" << values_quantized[value_index] + << ", input_min=" << input_min << ", input_max=" << input_max + << ", output_min=" << output_min << ", output_max=" << output_max + << ", value_index=" << value_index; + } + } + // If eigen_device is NULL, then the reference implementation is tested. void TestRequantizeManyInNewRange32To8Bit( Eigen::ThreadPoolDevice* eigen_device) { @@ -48,51 +92,47 @@ class QuantizationUtilsTest : public ::testing::Test { {3.0f, 3.0f, 0.0f, 255.0f}, // input min == max {0.0f, 255.0f, 5.0f, 5.0f}, // output min == max }; - for (size_t range_index = 0; range_index < ranges_count; ++range_index) { - const float input_min = ranges[range_index][0]; - const float input_max = ranges[range_index][1]; - const float output_min = ranges[range_index][2]; - const float output_max = ranges[range_index][3]; + for (int i = 0; i < ranges_count; ++i) { + const auto& r = ranges[i]; std::vector<qint32> values_quantized; - std::vector<quint8> expected_values; - for (size_t value_index = 0; value_index < values_count; ++value_index) { - const float value_float = values[value_index]; - values_quantized.push_back( - FloatToQuantized<qint32>(value_float, input_min, input_max)); - expected_values.push_back(FloatToQuantized<quint8>( - QuantizedToFloat(values_quantized[value_index], input_min, - input_max), - output_min, output_max)); - } - - Tensor i_tensor = - tensorflow::test::AsTensor(gtl::ArraySlice<qint32>(values_quantized)); - Tensor o_tensor(DT_QUINT8, TensorShape{values_count}); - auto output_values = o_tensor.flat<quint8>(); - - if (eigen_device == nullptr) { - auto input_array = i_tensor.flat<qint32>(); - RequantizeManyInNewRange(input_array.data(), input_array.size(), - input_min, input_max, output_min, output_max, - output_values.data()); - } else { - RequantizeManyInNewRangeUsingEigen<qint32, quint8>( - *eigen_device, i_tensor, input_min, input_max, output_min, - output_max, &o_tensor); + for (int value_index = 0; value_index < values_count; ++value_index) { + const float v = values[value_index]; + values_quantized.push_back(FloatToQuantized<qint32>(v, r[0], r[1])); } + TestRequantizeMany(eigen_device, r[0], r[1], r[2], r[3], + values_quantized); + } - for (size_t value_index = 0; value_index < values_count; ++value_index) { - // Here we convert the quantized input value to what we expect - // to get in the output range. - ASSERT_EQ(expected_values[value_index], output_values(value_index)) - << "values_quantized[" << value_index - << "]=" << values_quantized[value_index] << ", values[" - << value_index << "]=" << values[value_index] - << ", input_min=" << input_min << ", input_max=" << input_max - << ", output_min=" << output_min << ", output_max=" << output_max - << ", value_index=" << value_index; - } + // Test with many different values in the input quantized range. + qint32 low = Eigen::NumTraits<qint32>::lowest(); + qint32 high = Eigen::NumTraits<qint32>::highest(); + std::vector<qint32> vals{low, high}; + int num_steps = 14419; + qint32 step = static_cast<int32>((1L << 32) / num_steps); + qint32 v = low + static_cast<qint32>(1); + for (int i = 0; i < num_steps; ++i) { + vals.push_back(v); + v += step; } + TestRequantizeMany(eigen_device, -1.0f, 1.0f, -1.0f, 1.0f, vals); + TestRequantizeMany(eigen_device, -255.0f, 255.0f, -255.0f, 255.0f, vals); + TestRequantizeMany(eigen_device, -1.0f, 1.0f, -12345678.0f, 12345678.0f, + vals); + TestRequantizeMany(eigen_device, -1.0f, 12345678.0f, -12345678.0f, + 12345678.0f, vals); + + // Test when the input range is large and output range is small. + // Use all quantized values where the float is in the output range. + const float out_min = -29.1234; + const float out_max = 23.1234; + const float in_min = -1e6; + const float in_max = 1e6; + + low = FloatToQuantized<qint32>(out_min, in_min, in_max); + high = FloatToQuantized<qint32>(out_max, in_min, in_max); + vals.clear(); + for (int32 i = low; i <= high; ++i) vals.push_back(i); + TestRequantizeMany(eigen_device, in_min, in_max, out_min, out_max, vals); } template <typename InputType, typename OutputType> @@ -408,14 +448,12 @@ TEST_F(QuantizationUtilsTest, RequantizeManyInNewRange32To8Bit) { TestRequantizeManyInNewRange32To8Bit(nullptr /* eigen_device */); } -#if 0 TEST_F(QuantizationUtilsTest, RequantizeManyInNewRange32To8BitUsingEigen) { thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */); EigenThreadPoolWrapper wrapper(&threadpool); Eigen::ThreadPoolDevice eigen_device(&wrapper, 2 /* num_threads */); TestRequantizeManyInNewRange32To8Bit(&eigen_device); } -#endif TEST_F(QuantizationUtilsTest, RequantizeManyInNewRange32To8BitEigenVsNonEigen) { TestRequantizeManyInNewRangeEigenVsNonEigen<qint32, quint8>(); diff --git a/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc b/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc index 5a109d9f76..18dffd1dc6 100644 --- a/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc +++ b/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc @@ -71,10 +71,10 @@ class QuantizeDownAndShrinkRangeOp : public OpKernel { #if 0 // This is the reference, non-eigen implementation: auto output_array = output->flat<T2>(); - RequantizeManyInNewRange(input_array.data(), input_array.size(), - input_min_float, input_max_float, actual_min_float, - actual_max_float, output_array.data()); - + RequantizeManyInNewRange<T1, T2>(input_array.data(), input_array.size(), + input_min_float, input_max_float, + actual_min_float, actual_max_float, + output_array.data()); #endif if (input_array.size() > 0) { |