aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/quantize
diff options
context:
space:
mode:
authorGravatar Suharsh Sivakumar <suharshs@google.com>2018-03-19 20:42:00 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-03-19 20:45:28 -0700
commit1f4ee9d3d705a9c64af69e51e9fb5c738e145802 (patch)
tree7a0057ae9d4dee5c517ce4eaa2a8733fdd5a6304 /tensorflow/contrib/quantize
parent88334807a5beb8b61a967d21e534ed238e7916c0 (diff)
Quantize bypasses after activations.
PiperOrigin-RevId: 189686219
Diffstat (limited to 'tensorflow/contrib/quantize')
-rw-r--r--tensorflow/contrib/quantize/python/quantize.py84
-rw-r--r--tensorflow/contrib/quantize/python/quantize_test.py29
2 files changed, 108 insertions, 5 deletions
diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py
index 6cc097b20e..9780e6dbcc 100644
--- a/tensorflow/contrib/quantize/python/quantize.py
+++ b/tensorflow/contrib/quantize/python/quantize.py
@@ -123,10 +123,47 @@ def Quantize(graph,
vars_collection=vars_collection,
bits=activation_bits)
+ if layer_match.post_activation_bypass_op is not None:
+ _InsertQuantOp(
+ add_context,
+ 'post_activation_bypass_quant',
+ layer_match.post_activation_bypass_op,
+ input_to_ops_map.ConsumerOperations(
+ layer_match.post_activation_bypass_op),
+ is_training,
+ moving_avg=True,
+ ema_decay=ema_decay,
+ quant_delay=quant_delay,
+ vars_collection=vars_collection,
+ bits=activation_bits)
+
def _FindLayersToQuantize(graph):
"""Matches layers in graph to quantize.
+ The following patterns get matched. Nodes surrounded by [] will be
+ optionally matched:
+
+ weight|folded_weight
+ /
+ conv|fc
+ |
+ [post_conv_correction]
+ |
+ biasadd|folded_bias
+ |
+ [bypass]
+ |
+ activation
+ |
+ [post_activation_bypass]
+
+ Match replacements:
+ If weight_folded_weight is found, FakeQuant is added afterwards.
+ If bypass is found, FakeQuant is added before and after.
+ If activation is found, FakeQuant is added afterwards.
+ If post_activation_bypass is found, FakeQuant is added afterwards.
+
Args:
graph: Graph to perform match on.
@@ -179,7 +216,7 @@ def _FindLayersToQuantize(graph):
[bias_add_pattern, folded_bias_add_pattern])
])
- # The input to the activation can come from bias add, fold bias add or the
+ # The input to the activation can come from bias add, fold bias add, the
# bypasses.
activation_pattern = graph_matcher.OpTypePattern(
'|'.join(_ACTIVATION_TYPES),
@@ -190,7 +227,16 @@ def _FindLayersToQuantize(graph):
])
])
- layer_matcher = graph_matcher.GraphMatcher(activation_pattern)
+ post_activation_bypass_pattern_a = graph_matcher.OpTypePattern(
+ 'Add', inputs=['*', activation_pattern])
+ post_activation_bypass_pattern_b = graph_matcher.OpTypePattern(
+ 'Add', inputs=[activation_pattern, '*'])
+
+ layer_matcher = graph_matcher.GraphMatcher(
+ graph_matcher.OneofPattern([
+ post_activation_bypass_pattern_a, post_activation_bypass_pattern_b,
+ activation_pattern
+ ]))
for match_result in layer_matcher.match_graph(graph):
layer_op = match_result.get_op(layer_pattern)
weight_tensor = match_result.get_tensor(weight_pattern)
@@ -203,8 +249,19 @@ def _FindLayersToQuantize(graph):
bypass_op = match_result.get_op(bypass_pattern_a)
if bypass_op is None:
bypass_op = match_result.get_op(bypass_pattern_b)
+ post_activation_bypass_op = match_result.get_op(
+ post_activation_bypass_pattern_a)
+ if post_activation_bypass_op is None:
+ post_activation_bypass_op = match_result.get_op(
+ post_activation_bypass_pattern_b)
+ # If we don't find a post_activation_bypass_op but activation_op has a
+ # bypass following it, then we need to skip this match, since there will be
+ # another match that includes post_activation_bypass_op.
+ if post_activation_bypass_op is None and _HasPostActivationBypass(
+ activation_op):
+ continue
yield _LayerMatch(layer_op, weight_tensor, activation_op, bypass_op,
- bias_add_op)
+ post_activation_bypass_op, bias_add_op)
# Match the final layer, where there will not be an activation and instead
# the output of the final BiasAdd must be quantized, so we treat it as the
@@ -215,19 +272,32 @@ def _FindLayersToQuantize(graph):
for match_result in final_layer_matcher.match_graph(graph):
layer_op = match_result.get_op(layer_pattern)
weight_tensor = match_result.get_tensor(weight_pattern)
+ if weight_tensor is None:
+ weight_tensor = match_result.get_tensor(folded_weight_pattern)
activation_op = match_result.get_op(bias_add_pattern)
- yield _LayerMatch(layer_op, weight_tensor, activation_op, None, None)
+ if activation_op is None:
+ activation_op = match_result.get_op(folded_bias_add_pattern)
+ yield _LayerMatch(layer_op, weight_tensor, activation_op, None, None, None)
+
+
+def _HasPostActivationBypass(activation_op):
+ for activation_tensor in activation_op.outputs:
+ for output_op in activation_tensor.consumers():
+ if output_op.type == 'Add':
+ return True
+ return False
class _LayerMatch(object):
"""Contains all information related to a matched Layer."""
def __init__(self, layer_op, weight_tensor, activation_op, bypass_op,
- bias_add_op):
+ post_activation_bypass_op, bias_add_op):
self._layer_op = layer_op
self._weight_tensor = weight_tensor
self._activation_op = activation_op
self._bypass_op = bypass_op
+ self._post_activation_bypass_op = post_activation_bypass_op
self._bias_add_op = bias_add_op
@property
@@ -247,6 +317,10 @@ class _LayerMatch(object):
return self._bypass_op
@property
+ def post_activation_bypass_op(self):
+ return self._post_activation_bypass_op
+
+ @property
def bias_add_op(self):
return self._bias_add_op
diff --git a/tensorflow/contrib/quantize/python/quantize_test.py b/tensorflow/contrib/quantize/python/quantize_test.py
index ef59475167..8e60f4b661 100644
--- a/tensorflow/contrib/quantize/python/quantize_test.py
+++ b/tensorflow/contrib/quantize/python/quantize_test.py
@@ -135,6 +135,35 @@ class QuantizeTest(test_util.TensorFlowTestCase):
self.assertTrue('FakeQuantWithMinMaxVars' in
[op.type for op in bias_add_op.outputs[0].consumers()])
+ def testPostActivationBypassQuantized(self):
+ self._RunTestOverParameters(self._TestPostActivationBypassQuantized)
+
+ def _TestPostActivationBypassQuantized(self, is_training):
+ graph = ops.Graph()
+ with graph.as_default():
+ batch_size, height, width, depth = 5, 128, 128, 3
+ input1 = array_ops.zeros((batch_size, height, width, depth))
+ input2 = array_ops.zeros((batch_size, height / 2, width / 2, 32))
+ conv = conv2d(
+ input1,
+ 32, [5, 5],
+ stride=2,
+ padding='SAME',
+ weights_initializer=self._WeightInit(0.09),
+ activation_fn=array_ops.identity,
+ scope='test/test')
+ bypass_tensor = math_ops.add(conv, input2, name='test/add')
+ _ = array_ops.identity(bypass_tensor, name='test/output')
+
+ quantize.Quantize(graph, is_training, weight_bits=8, activation_bits=8)
+
+ # Ensure that the bypass node is preceded and followed by
+ # FakeQuantWithMinMaxVars operations.
+ self.assertTrue('FakeQuantWithMinMaxVars' in
+ [c.type for c in bypass_tensor.consumers()])
+ self.assertTrue('FakeQuantWithMinMaxVars' in
+ [i.op.type for i in bypass_tensor.op.inputs])
+
def _WeightInit(self, stddev):
"""Returns truncated normal variable initializer.