Support narrow_range attr on FakeQuant nodes

PiperOrigin-RevId: 204339562
author: A. Unique TensorFlower <gardener@tensorflow.org> 2018-07-12 12:04:34 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-07-12 12:08:17 -0700
commit: 9ba6943a1dbbc415b72835517ad58808ca6a6a3d (patch)
tree: 8c40f84f8dcb6deb9799664a0c8068822c6897b9 /tensorflow/contrib/lite/toco/model.h
parent: c35bd2e9d3d9311bc7fb0f2463869faf1a8a7b50 (diff)
1 files changed, 35 insertions, 0 deletions
diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h
index 8660464fdb..d06a30b638 100644
--- a/tensorflow/contrib/lite/toco/model.h
+++ b/tensorflow/contrib/lite/toco/model.h
@@ -791,6 +791,7 @@ struct FakeQuantOperator : Operator {
   FakeQuantOperator() : Operator(OperatorType::kFakeQuant) {}
   std::unique_ptr<MinMax> minmax;
   int num_bits = 8;
+  bool narrow_range = false;
 };
 
 // Element-wise division operator.
@@ -1854,6 +1855,40 @@ struct Array {
   // If this is non-null, then these quantization parameters are to be used
   // to assign a meaning as real numbers to the elements of this array.
   std::unique_ptr<QuantizationParams> quantization_params;
+  // narrow_range is a detail of how toco handles FakeQuant operators with
+  // narrow_range, see
+  // https://www.tensorflow.org/api_docs/python/tf/fake_quant_with_min_max_vars
+  //
+  // For more context about what that is useful for, see the big comment in
+  // graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc
+  //
+  // The narrow_range flag applies only to quantized arrays, and changes
+  // their quantization in the following way when it is set to 'true':
+  // 1. The computation of {zero_point, scale} from {min, max} needs to be
+  //    amended so that the real min value will get quantized to
+  //    (min_quantized_value + 1) instead of just (min_quantized_value).
+  //    E.g. for uint8 quantization, the real min value should get quantized to
+  //    the uint8 value 1, not 0.
+  // 2. Quantized values should get clamped to the interval
+  //    [min_quantized_value + 1, max_value]. Equivalently, the
+  //    min_quantized_value should get nudged to (min_quantized_value + 1).
+  // The reason why 1. does not imply 2. is that real values may not belong to
+  // the stated [min, max] interval. Concretely, weights recorded at the last
+  // learning step may not fall in the [min, max] interval recorded over
+  // previous learning steps, as the values evolve across learning steps.
+  //
+  // Rationale why this is directly a field on Array:
+  // - This can't be just a field on FakeQuantOperator, because
+  //   FakeQuantOperators are gone (DropFakeQuant) before we get to using that
+  //   information (Quantize). We need a place to store that bit in the interim.
+  // - This can't be in QuantizationParams because we need to record this
+  //   ahead of quantization, and QuantizationParams are only created during
+  //   quantization.
+  // - This could be in MinMax, but that would be an abuse of what MinMax is
+  //   about, and would break existing code that assumes that a MinMax is just
+  //   a min and a max. Unlike MinMax which is agnostic as to the quantized
+  //   data type, narrow_range refers to values in the quantized data type.
+  bool narrow_range = false;
 
  private:
   std::unique_ptr<Shape> array_shape;
author	A. Unique TensorFlower <gardener@tensorflow.org>	2018-07-12 12:04:34 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-07-12 12:08:17 -0700
commit	9ba6943a1dbbc415b72835517ad58808ca6a6a3d (patch)
tree	8c40f84f8dcb6deb9799664a0c8068822c6897b9 /tensorflow/contrib/lite/toco/model.h
parent	c35bd2e9d3d9311bc7fb0f2463869faf1a8a7b50 (diff)