aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2016-07-12 18:57:22 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-07-12 20:03:05 -0700
commit092c06351f8f30d9e2e92a5ed30d9c0afc5b3a33 (patch)
treebb2de033da449b7f888696165ad62efd71b560a3
parent10211a6c8e168f54ac0096d6fc7bc6fd7346e6fe (diff)
Enable optimized requantization function.
Fix rezeroing of input range in optimized requantize function to account for signed input. Change some computations to be in scaled-up fixed point instead of after scaling back down. Make rounding delta always be positive. Change: 127272485
-rw-r--r--tensorflow/contrib/quantization/kernels/quantization_utils.h48
-rw-r--r--tensorflow/contrib/quantization/kernels/quantization_utils_test.cc124
-rw-r--r--tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc8
3 files changed, 106 insertions, 74 deletions
diff --git a/tensorflow/contrib/quantization/kernels/quantization_utils.h b/tensorflow/contrib/quantization/kernels/quantization_utils.h
index e43b8d7f0d..1bff8b194c 100644
--- a/tensorflow/contrib/quantization/kernels/quantization_utils.h
+++ b/tensorflow/contrib/quantization/kernels/quantization_utils.h
@@ -191,9 +191,10 @@ inline T2 RequantizeInNewRange(T1 input, float min_input, float max_input,
}
template <class T1, class T2>
-inline void RequantizeManyInNewRange(T1* input, size_t count, float min_input,
- float max_input, float min_output,
- float max_output, T2* output) {
+inline void RequantizeManyInNewRange(const T1* input, size_t count,
+ float min_input, float max_input,
+ float min_output, float max_output,
+ T2* output) {
for (size_t index = 0; index < count; ++index) {
const float input_float =
QuantizedToFloat<T1>(input[index], min_input, max_input);
@@ -206,7 +207,7 @@ inline void RequantizeManyInNewRange(T1* input, size_t count, float min_input,
// possible using only fixed-point math for the inner loop.
template <>
inline void RequantizeManyInNewRange<qint32, quint8>(
- qint32* input, size_t count, float min_input, float max_input,
+ const qint32* input, size_t count, float min_input, float max_input,
float min_output, float max_output, quint8* output) {
// Initially we calculate all the constants we need once, before we go into
// the inner loop. If this is updated, also update the Eigen version.
@@ -215,16 +216,17 @@ inline void RequantizeManyInNewRange<qint32, quint8>(
const float output_range = max_output - min_output;
const float recip_output_range =
output_range == 0.0 ? 0.0 : (255.0 / output_range);
- const int64 recip_output_range_fp =
- static_cast<int64>(recip_output_range * (1 << fp_shift));
+ const float input_rezero = (min_input + max_input) / 2.0;
const int64 range_scale_fp =
output_range == 0.0 ? 0.0
: static_cast<int64>(255.0 * (1 << fp_shift) *
input_range / output_range);
const int64 input_offset_fp =
- (min_input * recip_output_range_fp) + (range_scale_fp >> 1);
+ static_cast<int64>(input_rezero * recip_output_range * (1 << fp_shift));
const int64 output_offset_fp =
- output_range == 0.0 ? 0.0 : round((min_output * 255.0) / output_range);
+ output_range == 0.0 ? 0 : static_cast<int64>((1 << fp_shift) *
+ (min_output * 255.0) /
+ output_range);
const int64 rounding_delta = 1 << (fp_shift - 1);
// Inside this loop we just do minimal adds, multiplies, and shifts, in a way
@@ -235,11 +237,9 @@ inline void RequantizeManyInNewRange<qint32, quint8>(
const int64 input_value = static_cast<int64>(input[index]);
const int64 fp_value =
((input_value * range_scale_fp) >> 32) + input_offset_fp;
- const int64 round_intermediate =
- ((fp_value >= 0) ? (fp_value + rounding_delta)
- : (fp_value - rounding_delta)) >>
- fp_shift;
- int64 quantized_int64 = (round_intermediate - output_offset_fp);
+ const int64 offset_intermediate = fp_value - output_offset_fp;
+ const int64 round_intermediate = offset_intermediate + rounding_delta;
+ int64 quantized_int64 = round_intermediate >> fp_shift;
quantized_int64 = std::max(quantized_int64, 0LL);
quantized_int64 = std::min(quantized_int64, 255LL);
output[index] = static_cast<quint8>(static_cast<int32>(quantized_int64));
@@ -269,15 +269,11 @@ inline void RequantizeManyInNewRangeUsingEigen(
output->flat<T2>().device(device) = input_requantized;
}
-#if 0
// See RequantizeManyInNewRange() for a non-eigen reference implementation.
//
// Because converting 32-bit accumulated results down to eight bit is a common
// case, we have a specialized code path to handle it as efficiently as
// possible using only fixed-point math for the inner loop.
-//
-// See #ifdefed out test in quantization_utils_test.cc
-// (RequantizeManyInNewRange32To8BitUsingEigen).
template <>
inline void RequantizeManyInNewRangeUsingEigen<qint32, quint8>(
const Eigen::ThreadPoolDevice& device, const Tensor& input, float min_input,
@@ -289,14 +285,15 @@ inline void RequantizeManyInNewRangeUsingEigen<qint32, quint8>(
const float output_range = max_output - min_output;
const float recip_output_range =
output_range == 0.0 ? 0.0 : (255.0 / output_range);
- const int64 recip_output_range_fp =
- static_cast<int64>(recip_output_range * (1 << fp_shift));
+ const float input_rezero = (min_input + max_input) / 2.0;
const int64 range_scale_fp =
static_cast<int64>(255.0 * (1 << fp_shift) * input_range / output_range);
const int64 input_offset_fp =
- (min_input * recip_output_range_fp) + (range_scale_fp >> 1);
+ static_cast<int64>(input_rezero * recip_output_range * (1 << fp_shift));
const int64 output_offset_fp =
- output_range == 0.0 ? 0.0 : round((min_output * 255.0) / output_range);
+ output_range == 0.0 ? 0 : static_cast<int64>((1 << fp_shift) *
+ (min_output * 255.0) /
+ output_range);
const int64 rounding_delta = 1 << (fp_shift - 1);
// Inside this eigen expression we just do minimal adds, multiplies, and
@@ -305,17 +302,14 @@ inline void RequantizeManyInNewRangeUsingEigen<qint32, quint8>(
auto input_array = input.flat<qint32>();
auto fp_value = ((input_array.template cast<int64>() * range_scale_fp)
.unaryExpr(int64_right_shift_op<32>())) +
- input_offset_fp;
- auto round_intermediate = (fp_value + rounding_delta * fp_value.sign())
- .unaryExpr(int64_right_shift_op<fp_shift>());
- auto input_requantized = (round_intermediate - output_offset_fp)
- .cwiseMax(0LL)
+ (input_offset_fp - output_offset_fp + rounding_delta);
+ auto intermediate = fp_value.unaryExpr(int64_right_shift_op<fp_shift>());
+ auto input_requantized = intermediate.cwiseMax(0LL)
.cwiseMin(255LL)
.template cast<int32>()
.template cast<quint8>();
output->flat<quint8>().device(device) = input_requantized;
}
-#endif
// REQUIRES: 'result->NumElements() == input.NumElements()'
template <class T>
diff --git a/tensorflow/contrib/quantization/kernels/quantization_utils_test.cc b/tensorflow/contrib/quantization/kernels/quantization_utils_test.cc
index 3d4356a332..f4a4e8d962 100644
--- a/tensorflow/contrib/quantization/kernels/quantization_utils_test.cc
+++ b/tensorflow/contrib/quantization/kernels/quantization_utils_test.cc
@@ -25,12 +25,56 @@ limitations under the License.
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/lib/core/threadpool.h"
#include "tensorflow/core/lib/random/simple_philox.h"
+#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/platform/test.h"
namespace tensorflow {
class QuantizationUtilsTest : public ::testing::Test {
protected:
+ void TestRequantizeMany(Eigen::ThreadPoolDevice* eigen_device,
+ float input_min, float input_max, float output_min,
+ float output_max,
+ const std::vector<qint32>& values_quantized,
+ int tolerance = 1) {
+ const int values_count = values_quantized.size();
+ std::vector<quint8> expected_values;
+ for (int value_index = 0; value_index < values_count; ++value_index) {
+ expected_values.push_back(FloatToQuantized<quint8>(
+ QuantizedToFloat(values_quantized[value_index], input_min, input_max),
+ output_min, output_max));
+ }
+
+ Tensor i_tensor =
+ tensorflow::test::AsTensor(gtl::ArraySlice<qint32>(values_quantized));
+ Tensor o_tensor(DT_QUINT8, TensorShape{values_count});
+ auto output_values = o_tensor.flat<quint8>();
+
+ if (eigen_device == nullptr) {
+ auto input_array = i_tensor.flat<qint32>();
+ RequantizeManyInNewRange(input_array.data(), input_array.size(),
+ input_min, input_max, output_min, output_max,
+ output_values.data());
+ } else {
+ RequantizeManyInNewRangeUsingEigen<qint32, quint8>(
+ *eigen_device, i_tensor, input_min, input_max, output_min, output_max,
+ &o_tensor);
+ }
+
+ const string tolerance_str = strings::StrCat("+-", tolerance);
+ for (size_t value_index = 0; value_index < values_count; ++value_index) {
+ int e = expected_values[value_index];
+ int v = output_values(value_index);
+ ASSERT_TRUE(std::abs(e - v) <= tolerance)
+ << "actual=" << v << ", expected=" << e << tolerance_str
+ << ", values_quantized[" << value_index
+ << "]=" << values_quantized[value_index]
+ << ", input_min=" << input_min << ", input_max=" << input_max
+ << ", output_min=" << output_min << ", output_max=" << output_max
+ << ", value_index=" << value_index;
+ }
+ }
+
// If eigen_device is NULL, then the reference implementation is tested.
void TestRequantizeManyInNewRange32To8Bit(
Eigen::ThreadPoolDevice* eigen_device) {
@@ -48,51 +92,47 @@ class QuantizationUtilsTest : public ::testing::Test {
{3.0f, 3.0f, 0.0f, 255.0f}, // input min == max
{0.0f, 255.0f, 5.0f, 5.0f}, // output min == max
};
- for (size_t range_index = 0; range_index < ranges_count; ++range_index) {
- const float input_min = ranges[range_index][0];
- const float input_max = ranges[range_index][1];
- const float output_min = ranges[range_index][2];
- const float output_max = ranges[range_index][3];
+ for (int i = 0; i < ranges_count; ++i) {
+ const auto& r = ranges[i];
std::vector<qint32> values_quantized;
- std::vector<quint8> expected_values;
- for (size_t value_index = 0; value_index < values_count; ++value_index) {
- const float value_float = values[value_index];
- values_quantized.push_back(
- FloatToQuantized<qint32>(value_float, input_min, input_max));
- expected_values.push_back(FloatToQuantized<quint8>(
- QuantizedToFloat(values_quantized[value_index], input_min,
- input_max),
- output_min, output_max));
- }
-
- Tensor i_tensor =
- tensorflow::test::AsTensor(gtl::ArraySlice<qint32>(values_quantized));
- Tensor o_tensor(DT_QUINT8, TensorShape{values_count});
- auto output_values = o_tensor.flat<quint8>();
-
- if (eigen_device == nullptr) {
- auto input_array = i_tensor.flat<qint32>();
- RequantizeManyInNewRange(input_array.data(), input_array.size(),
- input_min, input_max, output_min, output_max,
- output_values.data());
- } else {
- RequantizeManyInNewRangeUsingEigen<qint32, quint8>(
- *eigen_device, i_tensor, input_min, input_max, output_min,
- output_max, &o_tensor);
+ for (int value_index = 0; value_index < values_count; ++value_index) {
+ const float v = values[value_index];
+ values_quantized.push_back(FloatToQuantized<qint32>(v, r[0], r[1]));
}
+ TestRequantizeMany(eigen_device, r[0], r[1], r[2], r[3],
+ values_quantized);
+ }
- for (size_t value_index = 0; value_index < values_count; ++value_index) {
- // Here we convert the quantized input value to what we expect
- // to get in the output range.
- ASSERT_EQ(expected_values[value_index], output_values(value_index))
- << "values_quantized[" << value_index
- << "]=" << values_quantized[value_index] << ", values["
- << value_index << "]=" << values[value_index]
- << ", input_min=" << input_min << ", input_max=" << input_max
- << ", output_min=" << output_min << ", output_max=" << output_max
- << ", value_index=" << value_index;
- }
+ // Test with many different values in the input quantized range.
+ qint32 low = Eigen::NumTraits<qint32>::lowest();
+ qint32 high = Eigen::NumTraits<qint32>::highest();
+ std::vector<qint32> vals{low, high};
+ int num_steps = 14419;
+ qint32 step = static_cast<int32>((1L << 32) / num_steps);
+ qint32 v = low + static_cast<qint32>(1);
+ for (int i = 0; i < num_steps; ++i) {
+ vals.push_back(v);
+ v += step;
}
+ TestRequantizeMany(eigen_device, -1.0f, 1.0f, -1.0f, 1.0f, vals);
+ TestRequantizeMany(eigen_device, -255.0f, 255.0f, -255.0f, 255.0f, vals);
+ TestRequantizeMany(eigen_device, -1.0f, 1.0f, -12345678.0f, 12345678.0f,
+ vals);
+ TestRequantizeMany(eigen_device, -1.0f, 12345678.0f, -12345678.0f,
+ 12345678.0f, vals);
+
+ // Test when the input range is large and output range is small.
+ // Use all quantized values where the float is in the output range.
+ const float out_min = -29.1234;
+ const float out_max = 23.1234;
+ const float in_min = -1e6;
+ const float in_max = 1e6;
+
+ low = FloatToQuantized<qint32>(out_min, in_min, in_max);
+ high = FloatToQuantized<qint32>(out_max, in_min, in_max);
+ vals.clear();
+ for (int32 i = low; i <= high; ++i) vals.push_back(i);
+ TestRequantizeMany(eigen_device, in_min, in_max, out_min, out_max, vals);
}
template <typename InputType, typename OutputType>
@@ -408,14 +448,12 @@ TEST_F(QuantizationUtilsTest, RequantizeManyInNewRange32To8Bit) {
TestRequantizeManyInNewRange32To8Bit(nullptr /* eigen_device */);
}
-#if 0
TEST_F(QuantizationUtilsTest, RequantizeManyInNewRange32To8BitUsingEigen) {
thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */);
EigenThreadPoolWrapper wrapper(&threadpool);
Eigen::ThreadPoolDevice eigen_device(&wrapper, 2 /* num_threads */);
TestRequantizeManyInNewRange32To8Bit(&eigen_device);
}
-#endif
TEST_F(QuantizationUtilsTest, RequantizeManyInNewRange32To8BitEigenVsNonEigen) {
TestRequantizeManyInNewRangeEigenVsNonEigen<qint32, quint8>();
diff --git a/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc b/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc
index 5a109d9f76..18dffd1dc6 100644
--- a/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc
+++ b/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc
@@ -71,10 +71,10 @@ class QuantizeDownAndShrinkRangeOp : public OpKernel {
#if 0
// This is the reference, non-eigen implementation:
auto output_array = output->flat<T2>();
- RequantizeManyInNewRange(input_array.data(), input_array.size(),
- input_min_float, input_max_float, actual_min_float,
- actual_max_float, output_array.data());
-
+ RequantizeManyInNewRange<T1, T2>(input_array.data(), input_array.size(),
+ input_min_float, input_max_float,
+ actual_min_float, actual_max_float,
+ output_array.data());
#endif
if (input_array.size() > 0) {