diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2018-07-12 12:04:34 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-07-12 12:08:17 -0700 |
commit | 9ba6943a1dbbc415b72835517ad58808ca6a6a3d (patch) | |
tree | 8c40f84f8dcb6deb9799664a0c8068822c6897b9 /tensorflow/contrib/lite/toco/model.h | |
parent | c35bd2e9d3d9311bc7fb0f2463869faf1a8a7b50 (diff) |
Support narrow_range attr on FakeQuant nodes
PiperOrigin-RevId: 204339562
Diffstat (limited to 'tensorflow/contrib/lite/toco/model.h')
-rw-r--r-- | tensorflow/contrib/lite/toco/model.h | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index 8660464fdb..d06a30b638 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -791,6 +791,7 @@ struct FakeQuantOperator : Operator { FakeQuantOperator() : Operator(OperatorType::kFakeQuant) {} std::unique_ptr<MinMax> minmax; int num_bits = 8; + bool narrow_range = false; }; // Element-wise division operator. @@ -1854,6 +1855,40 @@ struct Array { // If this is non-null, then these quantization parameters are to be used // to assign a meaning as real numbers to the elements of this array. std::unique_ptr<QuantizationParams> quantization_params; + // narrow_range is a detail of how toco handles FakeQuant operators with + // narrow_range, see + // https://www.tensorflow.org/api_docs/python/tf/fake_quant_with_min_max_vars + // + // For more context about what that is useful for, see the big comment in + // graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc + // + // The narrow_range flag applies only to quantized arrays, and changes + // their quantization in the following way when it is set to 'true': + // 1. The computation of {zero_point, scale} from {min, max} needs to be + // amended so that the real min value will get quantized to + // (min_quantized_value + 1) instead of just (min_quantized_value). + // E.g. for uint8 quantization, the real min value should get quantized to + // the uint8 value 1, not 0. + // 2. Quantized values should get clamped to the interval + // [min_quantized_value + 1, max_value]. Equivalently, the + // min_quantized_value should get nudged to (min_quantized_value + 1). + // The reason why 1. does not imply 2. is that real values may not belong to + // the stated [min, max] interval. Concretely, weights recorded at the last + // learning step may not fall in the [min, max] interval recorded over + // previous learning steps, as the values evolve across learning steps. + // + // Rationale why this is directly a field on Array: + // - This can't be just a field on FakeQuantOperator, because + // FakeQuantOperators are gone (DropFakeQuant) before we get to using that + // information (Quantize). We need a place to store that bit in the interim. + // - This can't be in QuantizationParams because we need to record this + // ahead of quantization, and QuantizationParams are only created during + // quantization. + // - This could be in MinMax, but that would be an abuse of what MinMax is + // about, and would break existing code that assumes that a MinMax is just + // a min and a max. Unlike MinMax which is agnostic as to the quantized + // data type, narrow_range refers to values in the quantized data type. + bool narrow_range = false; private: std::unique_ptr<Shape> array_shape; |