aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/lite/toco/model.h
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-07-12 12:04:34 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-07-12 12:08:17 -0700
commit9ba6943a1dbbc415b72835517ad58808ca6a6a3d (patch)
tree8c40f84f8dcb6deb9799664a0c8068822c6897b9 /tensorflow/contrib/lite/toco/model.h
parentc35bd2e9d3d9311bc7fb0f2463869faf1a8a7b50 (diff)
Support narrow_range attr on FakeQuant nodes
PiperOrigin-RevId: 204339562
Diffstat (limited to 'tensorflow/contrib/lite/toco/model.h')
-rw-r--r--tensorflow/contrib/lite/toco/model.h35
1 files changed, 35 insertions, 0 deletions
diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h
index 8660464fdb..d06a30b638 100644
--- a/tensorflow/contrib/lite/toco/model.h
+++ b/tensorflow/contrib/lite/toco/model.h
@@ -791,6 +791,7 @@ struct FakeQuantOperator : Operator {
FakeQuantOperator() : Operator(OperatorType::kFakeQuant) {}
std::unique_ptr<MinMax> minmax;
int num_bits = 8;
+ bool narrow_range = false;
};
// Element-wise division operator.
@@ -1854,6 +1855,40 @@ struct Array {
// If this is non-null, then these quantization parameters are to be used
// to assign a meaning as real numbers to the elements of this array.
std::unique_ptr<QuantizationParams> quantization_params;
+ // narrow_range is a detail of how toco handles FakeQuant operators with
+ // narrow_range, see
+ // https://www.tensorflow.org/api_docs/python/tf/fake_quant_with_min_max_vars
+ //
+ // For more context about what that is useful for, see the big comment in
+ // graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc
+ //
+ // The narrow_range flag applies only to quantized arrays, and changes
+ // their quantization in the following way when it is set to 'true':
+ // 1. The computation of {zero_point, scale} from {min, max} needs to be
+ // amended so that the real min value will get quantized to
+ // (min_quantized_value + 1) instead of just (min_quantized_value).
+ // E.g. for uint8 quantization, the real min value should get quantized to
+ // the uint8 value 1, not 0.
+ // 2. Quantized values should get clamped to the interval
+ // [min_quantized_value + 1, max_value]. Equivalently, the
+ // min_quantized_value should get nudged to (min_quantized_value + 1).
+ // The reason why 1. does not imply 2. is that real values may not belong to
+ // the stated [min, max] interval. Concretely, weights recorded at the last
+ // learning step may not fall in the [min, max] interval recorded over
+ // previous learning steps, as the values evolve across learning steps.
+ //
+ // Rationale why this is directly a field on Array:
+ // - This can't be just a field on FakeQuantOperator, because
+ // FakeQuantOperators are gone (DropFakeQuant) before we get to using that
+ // information (Quantize). We need a place to store that bit in the interim.
+ // - This can't be in QuantizationParams because we need to record this
+ // ahead of quantization, and QuantizationParams are only created during
+ // quantization.
+ // - This could be in MinMax, but that would be an abuse of what MinMax is
+ // about, and would break existing code that assumes that a MinMax is just
+ // a min and a max. Unlike MinMax which is agnostic as to the quantized
+ // data type, narrow_range refers to values in the quantized data type.
+ bool narrow_range = false;
private:
std::unique_ptr<Shape> array_shape;