aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/quantization
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2016-07-12 18:57:22 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-07-12 20:03:05 -0700
commit092c06351f8f30d9e2e92a5ed30d9c0afc5b3a33 (patch)
treebb2de033da449b7f888696165ad62efd71b560a3 /tensorflow/contrib/quantization
parent10211a6c8e168f54ac0096d6fc7bc6fd7346e6fe (diff)
Enable optimized requantization function.
Fix rezeroing of input range in optimized requantize function to account for signed input. Change some computations to be in scaled-up fixed point instead of after scaling back down. Make rounding delta always be positive. Change: 127272485
Diffstat (limited to 'tensorflow/contrib/quantization')
-rw-r--r--tensorflow/contrib/quantization/kernels/quantization_utils.h48
-rw-r--r--tensorflow/contrib/quantization/kernels/quantization_utils_test.cc124
-rw-r--r--tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc8
3 files changed, 106 insertions, 74 deletions
diff --git a/tensorflow/contrib/quantization/kernels/quantization_utils.h b/tensorflow/contrib/quantization/kernels/quantization_utils.h
index e43b8d7f0d..1bff8b194c 100644
--- a/tensorflow/contrib/quantization/kernels/quantization_utils.h
+++ b/tensorflow/contrib/quantization/kernels/quantization_utils.h
@@ -191,9 +191,10 @@ inline T2 RequantizeInNewRange(T1 input, float min_input, float max_input,
}
template <class T1, class T2>
-inline void RequantizeManyInNewRange(T1* input, size_t count, float min_input,
- float max_input, float min_output,
- float max_output, T2* output) {
+inline void RequantizeManyInNewRange(const T1* input, size_t count,
+ float min_input, float max_input,
+ float min_output, float max_output,
+ T2* output) {
for (size_t index = 0; index < count; ++index) {
const float input_float =
QuantizedToFloat<T1>(input[index], min_input, max_input);
@@ -206,7 +207,7 @@ inline void RequantizeManyInNewRange(T1* input, size_t count, float min_input,
// possible using only fixed-point math for the inner loop.
template <>
inline void RequantizeManyInNewRange<qint32, quint8>(
- qint32* input, size_t count, float min_input, float max_input,
+ const qint32* input, size_t count, float min_input, float max_input,
float min_output, float max_output, quint8* output) {
// Initially we calculate all the constants we need once, before we go into
// the inner loop. If this is updated, also update the Eigen version.
@@ -215,16 +216,17 @@ inline void RequantizeManyInNewRange<qint32, quint8>(
const float output_range = max_output - min_output;
const float recip_output_range =
output_range == 0.0 ? 0.0 : (255.0 / output_range);
- const int64 recip_output_range_fp =
- static_cast<int64>(recip_output_range * (1 << fp_shift));
+ const float input_rezero = (min_input + max_input) / 2.0;
const int64 range_scale_fp =
output_range == 0.0 ? 0.0
: static_cast<int64>(255.0 * (1 << fp_shift) *
input_range / output_range);
const int64 input_offset_fp =
- (min_input * recip_output_range_fp) + (range_scale_fp >> 1);
+ static_cast<int64>(input_rezero * recip_output_range * (1 << fp_shift));
const int64 output_offset_fp =
- output_range == 0.0 ? 0.0 : round((min_output * 255.0) / output_range);
+ output_range == 0.0 ? 0 : static_cast<int64>((1 << fp_shift) *
+ (min_output * 255.0) /
+ output_range);
const int64 rounding_delta = 1 << (fp_shift - 1);
// Inside this loop we just do minimal adds, multiplies, and shifts, in a way
@@ -235,11 +237,9 @@ inline void RequantizeManyInNewRange<qint32, quint8>(
const int64 input_value = static_cast<int64>(input[index]);
const int64 fp_value =
((input_value * range_scale_fp) >> 32) + input_offset_fp;
- const int64 round_intermediate =
- ((fp_value >= 0) ? (fp_value + rounding_delta)
- : (fp_value - rounding_delta)) >>
- fp_shift;
- int64 quantized_int64 = (round_intermediate - output_offset_fp);
+ const int64 offset_intermediate = fp_value - output_offset_fp;
+ const int64 round_intermediate = offset_intermediate + rounding_delta;
+ int64 quantized_int64 = round_intermediate >> fp_shift;
quantized_int64 = std::max(quantized_int64, 0LL);
quantized_int64 = std::min(quantized_int64, 255LL);
output[index] = static_cast<quint8>(static_cast<int32>(quantized_int64));
@@ -269,15 +269,11 @@ inline void RequantizeManyInNewRangeUsingEigen(
output->flat<T2>().device(device) = input_requantized;
}
-#if 0
// See RequantizeManyInNewRange() for a non-eigen reference implementation.
//
// Because converting 32-bit accumulated results down to eight bit is a common
// case, we have a specialized code path to handle it as efficiently as
// possible using only fixed-point math for the inner loop.
-//
-// See #ifdefed out test in quantization_utils_test.cc
-// (RequantizeManyInNewRange32To8BitUsingEigen).
template <>
inline void RequantizeManyInNewRangeUsingEigen<qint32, quint8>(
const Eigen::ThreadPoolDevice& device, const Tensor& input, float min_input,
@@ -289,14 +285,15 @@ inline void RequantizeManyInNewRangeUsingEigen<qint32, quint8>(
const float output_range = max_output - min_output;
const float recip_output_range =
output_range == 0.0 ? 0.0 : (255.0 / output_range);
- const int64 recip_output_range_fp =
- static_cast<int64>(recip_output_range * (1 << fp_shift));
+ const float input_rezero = (min_input + max_input) / 2.0;
const int64 range_scale_fp =
static_cast<int64>(255.0 * (1 << fp_shift) * input_range / output_range);
const int64 input_offset_fp =
- (min_input * recip_output_range_fp) + (range_scale_fp >> 1);
+ static_cast<int64>(input_rezero * recip_output_range * (1 << fp_shift));
const int64 output_offset_fp =
- output_range == 0.0 ? 0.0 : round((min_output * 255.0) / output_range);
+ output_range == 0.0 ? 0 : static_cast<int64>((1 << fp_shift) *
+ (min_output * 255.0) /
+ output_range);
const int64 rounding_delta = 1 << (fp_shift - 1);
// Inside this eigen expression we just do minimal adds, multiplies, and
@@ -305,17 +302,14 @@ inline void RequantizeManyInNewRangeUsingEigen<qint32, quint8>(
auto input_array = input.flat<qint32>();
auto fp_value = ((input_array.template cast<int64>() * range_scale_fp)
.unaryExpr(int64_right_shift_op<32>())) +
- input_offset_fp;
- auto round_intermediate = (fp_value + rounding_delta * fp_value.sign())
- .unaryExpr(int64_right_shift_op<fp_shift>());
- auto input_requantized = (round_intermediate - output_offset_fp)
- .cwiseMax(0LL)
+ (input_offset_fp - output_offset_fp + rounding_delta);
+ auto intermediate = fp_value.unaryExpr(int64_right_shift_op<fp_shift>());
+ auto input_requantized = intermediate.cwiseMax(0LL)
.cwiseMin(255LL)
.template cast<int32>()
.template cast<quint8>();
output->flat<quint8>().device(device) = input_requantized;
}
-#endif
// REQUIRES: 'result->NumElements() == input.NumElements()'
template <class T>
diff --git a/tensorflow/contrib/quantization/kernels/quantization_utils_test.cc b/tensorflow/contrib/quantization/kernels/quantization_utils_test.cc
index 3d4356a332..f4a4e8d962 100644
--- a/tensorflow/contrib/quantization/kernels/quantization_utils_test.cc
+++ b/tensorflow/contrib/quantization/kernels/quantization_utils_test.cc
@@ -25,12 +25,56 @@ limitations under the License.
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/lib/core/threadpool.h"
#include "tensorflow/core/lib/random/simple_philox.h"
+#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/platform/test.h"
namespace tensorflow {
class QuantizationUtilsTest : public ::testing::Test {
protected:
+ void TestRequantizeMany(Eigen::ThreadPoolDevice* eigen_device,
+ float input_min, float input_max, float output_min,
+ float output_max,
+ const std::vector<qint32>& values_quantized,
+ int tolerance = 1) {
+ const int values_count = values_quantized.size();
+ std::vector<quint8> expected_values;
+ for (int value_index = 0; value_index < values_count; ++value_index) {
+ expected_values.push_back(FloatToQuantized<quint8>(
+ QuantizedToFloat(values_quantized[value_index], input_min, input_max),
+ output_min, output_max));
+ }
+
+ Tensor i_tensor =
+ tensorflow::test::AsTensor(gtl::ArraySlice<qint32>(values_quantized));
+ Tensor o_tensor(DT_QUINT8, TensorShape{values_count});
+ auto output_values = o_tensor.flat<quint8>();
+
+ if (eigen_device == nullptr) {
+ auto input_array = i_tensor.flat<qint32>();
+ RequantizeManyInNewRange(input_array.data(), input_array.size(),
+ input_min, input_max, output_min, output_max,
+ output_values.data());
+ } else {
+ RequantizeManyInNewRangeUsingEigen<qint32, quint8>(
+ *eigen_device, i_tensor, input_min, input_max, output_min, output_max,
+ &o_tensor);
+ }
+
+ const string tolerance_str = strings::StrCat("+-", tolerance);
+ for (size_t value_index = 0; value_index < values_count; ++value_index) {
+ int e = expected_values[value_index];
+ int v = output_values(value_index);
+ ASSERT_TRUE(std::abs(e - v) <= tolerance)
+ << "actual=" << v << ", expected=" << e << tolerance_str
+ << ", values_quantized[" << value_index
+ << "]=" << values_quantized[value_index]
+ << ", input_min=" << input_min << ", input_max=" << input_max
+ << ", output_min=" << output_min << ", output_max=" << output_max
+ << ", value_index=" << value_index;
+ }
+ }
+
// If eigen_device is NULL, then the reference implementation is tested.
void TestRequantizeManyInNewRange32To8Bit(
Eigen::ThreadPoolDevice* eigen_device) {
@@ -48,51 +92,47 @@ class QuantizationUtilsTest : public ::testing::Test {
{3.0f, 3.0f, 0.0f, 255.0f}, // input min == max
{0.0f, 255.0f, 5.0f, 5.0f}, // output min == max
};
- for (size_t range_index = 0; range_index < ranges_count; ++range_index) {
- const float input_min = ranges[range_index][0];
- const float input_max = ranges[range_index][1];
- const float output_min = ranges[range_index][2];
- const float output_max = ranges[range_index][3];
+ for (int i = 0; i < ranges_count; ++i) {
+ const auto& r = ranges[i];
std::vector<qint32> values_quantized;
- std::vector<quint8> expected_values;
- for (size_t value_index = 0; value_index < values_count; ++value_index) {
- const float value_float = values[value_index];
- values_quantized.push_back(
- FloatToQuantized<qint32>(value_float, input_min, input_max));
- expected_values.push_back(FloatToQuantized<quint8>(
- QuantizedToFloat(values_quantized[value_index], input_min,
- input_max),
- output_min, output_max));
- }
-
- Tensor i_tensor =
- tensorflow::test::AsTensor(gtl::ArraySlice<qint32>(values_quantized));
- Tensor o_tensor(DT_QUINT8, TensorShape{values_count});
- auto output_values = o_tensor.flat<quint8>();
-
- if (eigen_device == nullptr) {
- auto input_array = i_tensor.flat<qint32>();
- RequantizeManyInNewRange(input_array.data(), input_array.size(),
- input_min, input_max, output_min, output_max,
- output_values.data());
- } else {
- RequantizeManyInNewRangeUsingEigen<qint32, quint8>(
- *eigen_device, i_tensor, input_min, input_max, output_min,
- output_max, &o_tensor);
+ for (int value_index = 0; value_index < values_count; ++value_index) {
+ const float v = values[value_index];
+ values_quantized.push_back(FloatToQuantized<qint32>(v, r[0], r[1]));
}
+ TestRequantizeMany(eigen_device, r[0], r[1], r[2], r[3],
+ values_quantized);
+ }
- for (size_t value_index = 0; value_index < values_count; ++value_index) {
- // Here we convert the quantized input value to what we expect
- // to get in the output range.
- ASSERT_EQ(expected_values[value_index], output_values(value_index))
- << "values_quantized[" << value_index
- << "]=" << values_quantized[value_index] << ", values["
- << value_index << "]=" << values[value_index]
- << ", input_min=" << input_min << ", input_max=" << input_max
- << ", output_min=" << output_min << ", output_max=" << output_max
- << ", value_index=" << value_index;
- }
+ // Test with many different values in the input quantized range.
+ qint32 low = Eigen::NumTraits<qint32>::lowest();
+ qint32 high = Eigen::NumTraits<qint32>::highest();
+ std::vector<qint32> vals{low, high};
+ int num_steps = 14419;
+ qint32 step = static_cast<int32>((1L << 32) / num_steps);
+ qint32 v = low + static_cast<qint32>(1);
+ for (int i = 0; i < num_steps; ++i) {
+ vals.push_back(v);
+ v += step;
}
+ TestRequantizeMany(eigen_device, -1.0f, 1.0f, -1.0f, 1.0f, vals);
+ TestRequantizeMany(eigen_device, -255.0f, 255.0f, -255.0f, 255.0f, vals);
+ TestRequantizeMany(eigen_device, -1.0f, 1.0f, -12345678.0f, 12345678.0f,
+ vals);
+ TestRequantizeMany(eigen_device, -1.0f, 12345678.0f, -12345678.0f,
+ 12345678.0f, vals);
+
+ // Test when the input range is large and output range is small.
+ // Use all quantized values where the float is in the output range.
+ const float out_min = -29.1234;
+ const float out_max = 23.1234;
+ const float in_min = -1e6;
+ const float in_max = 1e6;
+
+ low = FloatToQuantized<qint32>(out_min, in_min, in_max);
+ high = FloatToQuantized<qint32>(out_max, in_min, in_max);
+ vals.clear();
+ for (int32 i = low; i <= high; ++i) vals.push_back(i);
+ TestRequantizeMany(eigen_device, in_min, in_max, out_min, out_max, vals);
}
template <typename InputType, typename OutputType>
@@ -408,14 +448,12 @@ TEST_F(QuantizationUtilsTest, RequantizeManyInNewRange32To8Bit) {
TestRequantizeManyInNewRange32To8Bit(nullptr /* eigen_device */);
}
-#if 0
TEST_F(QuantizationUtilsTest, RequantizeManyInNewRange32To8BitUsingEigen) {
thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */);
EigenThreadPoolWrapper wrapper(&threadpool);
Eigen::ThreadPoolDevice eigen_device(&wrapper, 2 /* num_threads */);
TestRequantizeManyInNewRange32To8Bit(&eigen_device);
}
-#endif
TEST_F(QuantizationUtilsTest, RequantizeManyInNewRange32To8BitEigenVsNonEigen) {
TestRequantizeManyInNewRangeEigenVsNonEigen<qint32, quint8>();
diff --git a/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc b/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc
index 5a109d9f76..18dffd1dc6 100644
--- a/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc
+++ b/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc
@@ -71,10 +71,10 @@ class QuantizeDownAndShrinkRangeOp : public OpKernel {
#if 0
// This is the reference, non-eigen implementation:
auto output_array = output->flat<T2>();
- RequantizeManyInNewRange(input_array.data(), input_array.size(),
- input_min_float, input_max_float, actual_min_float,
- actual_max_float, output_array.data());
-
+ RequantizeManyInNewRange<T1, T2>(input_array.data(), input_array.size(),
+ input_min_float, input_max_float,
+ actual_min_float, actual_max_float,
+ output_array.data());
#endif
if (input_array.size() > 0) {