diff options
author | Suharsh Sivakumar <suharshs@google.com> | 2018-03-19 20:42:00 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-03-19 20:45:28 -0700 |
commit | 1f4ee9d3d705a9c64af69e51e9fb5c738e145802 (patch) | |
tree | 7a0057ae9d4dee5c517ce4eaa2a8733fdd5a6304 /tensorflow/contrib/quantize | |
parent | 88334807a5beb8b61a967d21e534ed238e7916c0 (diff) |
Quantize bypasses after activations.
PiperOrigin-RevId: 189686219
Diffstat (limited to 'tensorflow/contrib/quantize')
-rw-r--r-- | tensorflow/contrib/quantize/python/quantize.py | 84 | ||||
-rw-r--r-- | tensorflow/contrib/quantize/python/quantize_test.py | 29 |
2 files changed, 108 insertions, 5 deletions
diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index 6cc097b20e..9780e6dbcc 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -123,10 +123,47 @@ def Quantize(graph, vars_collection=vars_collection, bits=activation_bits) + if layer_match.post_activation_bypass_op is not None: + _InsertQuantOp( + add_context, + 'post_activation_bypass_quant', + layer_match.post_activation_bypass_op, + input_to_ops_map.ConsumerOperations( + layer_match.post_activation_bypass_op), + is_training, + moving_avg=True, + ema_decay=ema_decay, + quant_delay=quant_delay, + vars_collection=vars_collection, + bits=activation_bits) + def _FindLayersToQuantize(graph): """Matches layers in graph to quantize. + The following patterns get matched. Nodes surrounded by [] will be + optionally matched: + + weight|folded_weight + / + conv|fc + | + [post_conv_correction] + | + biasadd|folded_bias + | + [bypass] + | + activation + | + [post_activation_bypass] + + Match replacements: + If weight_folded_weight is found, FakeQuant is added afterwards. + If bypass is found, FakeQuant is added before and after. + If activation is found, FakeQuant is added afterwards. + If post_activation_bypass is found, FakeQuant is added afterwards. + Args: graph: Graph to perform match on. @@ -179,7 +216,7 @@ def _FindLayersToQuantize(graph): [bias_add_pattern, folded_bias_add_pattern]) ]) - # The input to the activation can come from bias add, fold bias add or the + # The input to the activation can come from bias add, fold bias add, the # bypasses. activation_pattern = graph_matcher.OpTypePattern( '|'.join(_ACTIVATION_TYPES), @@ -190,7 +227,16 @@ def _FindLayersToQuantize(graph): ]) ]) - layer_matcher = graph_matcher.GraphMatcher(activation_pattern) + post_activation_bypass_pattern_a = graph_matcher.OpTypePattern( + 'Add', inputs=['*', activation_pattern]) + post_activation_bypass_pattern_b = graph_matcher.OpTypePattern( + 'Add', inputs=[activation_pattern, '*']) + + layer_matcher = graph_matcher.GraphMatcher( + graph_matcher.OneofPattern([ + post_activation_bypass_pattern_a, post_activation_bypass_pattern_b, + activation_pattern + ])) for match_result in layer_matcher.match_graph(graph): layer_op = match_result.get_op(layer_pattern) weight_tensor = match_result.get_tensor(weight_pattern) @@ -203,8 +249,19 @@ def _FindLayersToQuantize(graph): bypass_op = match_result.get_op(bypass_pattern_a) if bypass_op is None: bypass_op = match_result.get_op(bypass_pattern_b) + post_activation_bypass_op = match_result.get_op( + post_activation_bypass_pattern_a) + if post_activation_bypass_op is None: + post_activation_bypass_op = match_result.get_op( + post_activation_bypass_pattern_b) + # If we don't find a post_activation_bypass_op but activation_op has a + # bypass following it, then we need to skip this match, since there will be + # another match that includes post_activation_bypass_op. + if post_activation_bypass_op is None and _HasPostActivationBypass( + activation_op): + continue yield _LayerMatch(layer_op, weight_tensor, activation_op, bypass_op, - bias_add_op) + post_activation_bypass_op, bias_add_op) # Match the final layer, where there will not be an activation and instead # the output of the final BiasAdd must be quantized, so we treat it as the @@ -215,19 +272,32 @@ def _FindLayersToQuantize(graph): for match_result in final_layer_matcher.match_graph(graph): layer_op = match_result.get_op(layer_pattern) weight_tensor = match_result.get_tensor(weight_pattern) + if weight_tensor is None: + weight_tensor = match_result.get_tensor(folded_weight_pattern) activation_op = match_result.get_op(bias_add_pattern) - yield _LayerMatch(layer_op, weight_tensor, activation_op, None, None) + if activation_op is None: + activation_op = match_result.get_op(folded_bias_add_pattern) + yield _LayerMatch(layer_op, weight_tensor, activation_op, None, None, None) + + +def _HasPostActivationBypass(activation_op): + for activation_tensor in activation_op.outputs: + for output_op in activation_tensor.consumers(): + if output_op.type == 'Add': + return True + return False class _LayerMatch(object): """Contains all information related to a matched Layer.""" def __init__(self, layer_op, weight_tensor, activation_op, bypass_op, - bias_add_op): + post_activation_bypass_op, bias_add_op): self._layer_op = layer_op self._weight_tensor = weight_tensor self._activation_op = activation_op self._bypass_op = bypass_op + self._post_activation_bypass_op = post_activation_bypass_op self._bias_add_op = bias_add_op @property @@ -247,6 +317,10 @@ class _LayerMatch(object): return self._bypass_op @property + def post_activation_bypass_op(self): + return self._post_activation_bypass_op + + @property def bias_add_op(self): return self._bias_add_op diff --git a/tensorflow/contrib/quantize/python/quantize_test.py b/tensorflow/contrib/quantize/python/quantize_test.py index ef59475167..8e60f4b661 100644 --- a/tensorflow/contrib/quantize/python/quantize_test.py +++ b/tensorflow/contrib/quantize/python/quantize_test.py @@ -135,6 +135,35 @@ class QuantizeTest(test_util.TensorFlowTestCase): self.assertTrue('FakeQuantWithMinMaxVars' in [op.type for op in bias_add_op.outputs[0].consumers()]) + def testPostActivationBypassQuantized(self): + self._RunTestOverParameters(self._TestPostActivationBypassQuantized) + + def _TestPostActivationBypassQuantized(self, is_training): + graph = ops.Graph() + with graph.as_default(): + batch_size, height, width, depth = 5, 128, 128, 3 + input1 = array_ops.zeros((batch_size, height, width, depth)) + input2 = array_ops.zeros((batch_size, height / 2, width / 2, 32)) + conv = conv2d( + input1, + 32, [5, 5], + stride=2, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=array_ops.identity, + scope='test/test') + bypass_tensor = math_ops.add(conv, input2, name='test/add') + _ = array_ops.identity(bypass_tensor, name='test/output') + + quantize.Quantize(graph, is_training, weight_bits=8, activation_bits=8) + + # Ensure that the bypass node is preceded and followed by + # FakeQuantWithMinMaxVars operations. + self.assertTrue('FakeQuantWithMinMaxVars' in + [c.type for c in bypass_tensor.consumers()]) + self.assertTrue('FakeQuantWithMinMaxVars' in + [i.op.type for i in bypass_tensor.op.inputs]) + def _WeightInit(self, stddev): """Returns truncated normal variable initializer. |