aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py')
-rw-r--r--tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py401
1 files changed, 300 insertions, 101 deletions
diff --git a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py
index 0403b652d7..d9c41f90d0 100644
--- a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py
+++ b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py
@@ -18,131 +18,330 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+from collections import namedtuple
+import itertools
import warnings
import numpy as np
+import six
from tensorflow.contrib import tensorrt as trt
-from tensorflow.core.protobuf import config_pb2 as cpb2
-from tensorflow.python.framework import constant_op as cop
-from tensorflow.python.framework import dtypes as dtypes
-from tensorflow.python.framework import importer as importer
-from tensorflow.python.framework import ops as ops
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.core.protobuf import rewriter_config_pb2
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import importer
+from tensorflow.python.framework import ops
from tensorflow.python.framework import test_util
-from tensorflow.python.ops import array_ops as aops
-from tensorflow.python.ops import nn as nn
-from tensorflow.python.ops import nn_ops as nn_ops
-from tensorflow.python.platform import googletest
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.platform import test
+INPUT_NAME = "input"
+OUTPUT_NAME = "output"
+INPUT_DIMS = [100, 24, 24, 2]
+MODE_FP32 = "FP32"
+MODE_FP16 = "FP16"
+MODE_INT8 = "INT8"
-class IntegrationTest(test_util.TensorFlowTestCase):
+if six.PY2:
+ to_bytes = lambda s: s
+ to_string = lambda s: s
+else:
+ to_bytes = lambda s: s.encode("utf-8", errors="surrogateescape")
+ to_string = lambda s: s.decode("utf-8")
+
+
+# TODO(aaroey): test graph with different dtypes.
+def GetSingleEngineGraphDef(dtype=dtypes.float32):
+ """Create a graph containing single segment."""
+ g = ops.Graph()
+ with g.as_default():
+ inp = array_ops.placeholder(
+ dtype=dtype, shape=[None] + INPUT_DIMS[1:], name=INPUT_NAME)
+ with g.device("/GPU:0"):
+ conv_filter = constant_op.constant(
+ [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]],
+ name="weights",
+ dtype=dtype)
+ conv = nn.conv2d(
+ input=inp,
+ filter=conv_filter,
+ strides=[1, 2, 2, 1],
+ padding="SAME",
+ name="conv")
+ bias = constant_op.constant(
+ [4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtype)
+ added = nn.bias_add(conv, bias, name="bias_add")
+ relu = nn.relu(added, "relu")
+ identity = array_ops.identity(relu, "identity")
+ pool = nn_ops.max_pool(
+ identity, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool")
+ array_ops.squeeze(pool, name=OUTPUT_NAME)
+ return g.as_graph_def()
+
+
+# TODO(aaroey): test graph with different dtypes.
+def GetMultiEngineGraphDef(dtype=dtypes.float32):
+ """Create a graph containing multiple segment."""
+ g = ops.Graph()
+ with g.as_default():
+ inp = array_ops.placeholder(
+ dtype=dtype, shape=[None] + INPUT_DIMS[1:], name=INPUT_NAME)
+ with g.device("/GPU:0"):
+ conv_filter = constant_op.constant(
+ [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]],
+ name="weights",
+ dtype=dtype)
+ conv = nn.conv2d(
+ input=inp,
+ filter=conv_filter,
+ strides=[1, 2, 2, 1],
+ padding="SAME",
+ name="conv")
+ c1 = constant_op.constant(
+ np.random.randn(INPUT_DIMS[0], 12, 12, 6), dtype=dtype)
+ p = conv * c1
+ c2 = constant_op.constant(
+ np.random.randn(INPUT_DIMS[0], 12, 12, 6), dtype=dtype)
+ q = conv / c2
+
+ edge = math_ops.sin(q)
+ edge /= edge
+ r = edge + edge
+
+ p -= edge
+ q *= edge
+ s = p + q
+ s -= r
+ array_ops.squeeze(s, name=OUTPUT_NAME)
+ return g.as_graph_def()
+
+
+TestGraph = namedtuple("TestGraph",
+ ["gdef", "num_expected_engines", "expected_output_dims"])
+
+TEST_GRAPHS = {
+ "SingleEngineGraph":
+ TestGraph(
+ gdef=GetSingleEngineGraphDef(),
+ num_expected_engines=1,
+ expected_output_dims=(100, 6, 6, 6)),
+ "MultiEngineGraph":
+ TestGraph(
+ gdef=GetMultiEngineGraphDef(),
+ num_expected_engines=2,
+ expected_output_dims=(100, 12, 12, 6)),
+ # TODO(aaroey): add a large complex graph to test.
+}
+
+
+class TfTrtIntegrationTest(test_util.TensorFlowTestCase):
"""Class to test Tensorflow-TensorRT integration."""
def setUp(self):
"""Setup method."""
- super(IntegrationTest, self).setUp()
+ super(TfTrtIntegrationTest, self).setUp()
warnings.simplefilter("always")
- inp_dims = (100, 24, 24, 2)
- self._input = np.random.random_sample(inp_dims)
- self._original_graph = self.get_simple_graph_def()
- self._gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50)
- self._config = cpb2.ConfigProto(gpu_options=self._gpu_options)
- self._reference = self.run_graph(self._original_graph, self._input)
-
- def get_simple_graph_def(self):
- """Create a simple graph and return its graph_def."""
- g = ops.Graph()
- with g.as_default():
- a = aops.placeholder(
- dtype=dtypes.float32, shape=(None, 24, 24, 2), name="input")
- e = cop.constant(
- [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]],
- name="weights",
- dtype=dtypes.float32)
- conv = nn.conv2d(
- input=a, filter=e, strides=[1, 2, 2, 1], padding="SAME", name="conv")
- b = cop.constant(
- [4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtypes.float32)
- t = nn.bias_add(conv, b, name="biasAdd")
- relu = nn.relu(t, "relu")
- idty = aops.identity(relu, "ID")
- v = nn_ops.max_pool(
- idty, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool")
- aops.squeeze(v, name="output")
- return g.as_graph_def()
-
- def run_graph(self, gdef, dumm_inp):
- """Run given graphdef once."""
- ops.reset_default_graph()
+ self._input = np.random.random_sample(INPUT_DIMS)
+
+ def _GetConfigProto(self,
+ use_optimizer,
+ precision_mode=None,
+ is_dynamic_op=None):
+ if use_optimizer:
+ rewriter_cfg = rewriter_config_pb2.RewriterConfig()
+ rewriter_cfg.optimizers.extend(["constfold", "layout"])
+ custom_op = rewriter_cfg.custom_optimizers.add()
+ custom_op.name = "TensorRTOptimizer"
+ custom_op.parameter_map["minimum_segment_size"].i = 3
+ custom_op.parameter_map["max_batch_size"].i = self._input.shape[0]
+ custom_op.parameter_map["is_dynamic_op"].b = is_dynamic_op
+ custom_op.parameter_map["max_workspace_size_bytes"].i = 1 << 25
+ custom_op.parameter_map["precision_mode"].s = to_bytes(precision_mode)
+ graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_cfg)
+ else:
+ graph_options = config_pb2.GraphOptions()
+
+ gpu_options = config_pb2.GPUOptions()
+ if trt.trt_convert.get_linked_tensorrt_version()[0] == 3:
+ gpu_options.per_process_gpu_memory_fraction = 0.50
+
+ config = config_pb2.ConfigProto(
+ gpu_options=gpu_options, graph_options=graph_options)
+ return config
+
+ def _RunGraph(self, graph_key, gdef, input_data, config, num_runs=2):
+ """Run given graphdef multiple times."""
g = ops.Graph()
with g.as_default():
inp, out = importer.import_graph_def(
- graph_def=gdef, return_elements=["input", "output"])
+ graph_def=gdef, return_elements=[INPUT_NAME, OUTPUT_NAME], name="")
inp = inp.outputs[0]
out = out.outputs[0]
with self.test_session(
- graph=g, config=self._config, use_gpu=True, force_gpu=True) as sess:
- val = sess.run(out, {inp: dumm_inp})
+ graph=g, config=config, use_gpu=True, force_gpu=True) as sess:
+ val = None
+ # Defaults to 2 runs to verify result across multiple runs is same.
+ for _ in range(num_runs):
+ new_val = sess.run(out, {inp: input_data})
+ self.assertEquals(TEST_GRAPHS[graph_key].expected_output_dims,
+ new_val.shape)
+ if val is not None:
+ self.assertAllEqual(new_val, val)
+ val = new_val
return val
# Use real data that is representative of the inference dataset
# for calibration. For this test script it is random data.
- def run_calibration(self, gdef, dumm_inp):
- """Run given calibration graph multiple times."""
- ops.reset_default_graph()
- g = ops.Graph()
- with g.as_default():
- inp, out = importer.import_graph_def(
- graph_def=gdef, return_elements=["input", "output"])
- inp = inp.outputs[0]
- out = out.outputs[0]
- # run over real calibration data here, we are mimicking a calibration
- # set of 30 different batches. Use as much calibration data as you want
- with self.test_session(
- graph=g, config=self._config, use_gpu=True, force_gpu=True) as sess:
- for _ in range(30):
- val = sess.run(out, {inp: dumm_inp})
- return val
+ def _RunCalibration(self, graph_key, gdef, input_data, config):
+ """Run calibration on given graph."""
+ return self._RunGraph(graph_key, gdef, input_data, config, 30)
- def get_trt_graph(self, mode):
+ def _GetTrtGraph(self, gdef, precision_mode, is_dynamic_op):
"""Return trt converted graph."""
- if mode in ["FP32", "FP16", "INT8"]:
- return trt.create_inference_graph(
- input_graph_def=self._original_graph,
- outputs=["output"],
- max_batch_size=self._input.shape[0],
- max_workspace_size_bytes=1 << 25,
- precision_mode=mode, # TRT Engine precision "FP32","FP16" or "INT8"
- minimum_segment_size=2 # minimum number of nodes in an engine
- )
- return None
-
- def testFP32(self):
- """Test FP32 conversion. Results should be identical to native case."""
- trt_graph = self.get_trt_graph("FP32")
- result = self.run_graph(trt_graph, self._input)
- self.assertAllEqual(self._reference, result)
- result1 = self.run_graph(trt_graph, self._input)
- self.assertAllEqual(result1, result)
-
- def testFP16(self):
- """Test FP16 conversion. Results may be different from native case."""
- trt_graph = self.get_trt_graph("FP16")
- result = self.run_graph(trt_graph, self._input)
- self.assertAllClose(self._reference, result, rtol=1.e-03)
- result1 = self.run_graph(trt_graph, self._input)
- self.assertAllEqual(result1, result)
-
- def testINT8(self):
- """Test INT8 conversion. Results may be different from native case."""
- calib_graph = self.get_trt_graph("INT8")
- result = self.run_calibration(calib_graph, self._input)
- self.assertAllEqual(self._reference, result)
- int8_graph = trt.calib_graph_to_infer_graph(calib_graph)
- result = self.run_graph(int8_graph, self._input)
- self.assertAllClose(self._reference, result, rtol=1.e-03)
- result1 = self.run_graph(int8_graph, self._input)
- self.assertAllEqual(result1, result)
+ return trt.create_inference_graph(
+ input_graph_def=gdef,
+ outputs=[OUTPUT_NAME],
+ max_batch_size=self._input.shape[0],
+ max_workspace_size_bytes=1 << 25,
+ precision_mode=precision_mode,
+ minimum_segment_size=2,
+ is_dynamic_op=is_dynamic_op)
+
+ def _VerifyGraphDef(self,
+ graph_key,
+ gdef,
+ precision_mode=None,
+ is_calibrated=None,
+ dynamic_engine=None):
+ num_engines = 0
+ for n in gdef.node:
+ if n.op == "TRTEngineOp":
+ num_engines += 1
+ self.assertNotEqual("", n.attr["serialized_segment"].s)
+ self.assertNotEqual("", n.attr["segment_funcdef_name"].s)
+ self.assertEquals(n.attr["precision_mode"].s, precision_mode)
+ self.assertEquals(n.attr["static_engine"].b, not dynamic_engine)
+ if precision_mode == MODE_INT8 and is_calibrated:
+ self.assertNotEqual("", n.attr["calibration_data"].s)
+ else:
+ self.assertEquals("", n.attr["calibration_data"].s)
+ if precision_mode is None:
+ self.assertEquals(num_engines, 0)
+ else:
+ self.assertEquals(num_engines,
+ TEST_GRAPHS[graph_key].num_expected_engines)
+
+ def _RunTest(self, graph_key, use_optimizer, precision_mode,
+ dynamic_infer_engine, dynamic_calib_engine):
+ assert precision_mode in [MODE_FP32, MODE_FP16, MODE_INT8]
+ input_gdef = TEST_GRAPHS[graph_key].gdef
+ self._VerifyGraphDef(graph_key, input_gdef)
+
+ # Get reference result without running trt.
+ config_no_trt = self._GetConfigProto(False)
+ print("Running original graph w/o trt, config:\n%s" % str(config_no_trt))
+ ref_result = self._RunGraph(graph_key, input_gdef, self._input,
+ config_no_trt)
+
+ # Run calibration if necessary.
+ if precision_mode == MODE_INT8:
+
+ calib_config = self._GetConfigProto(use_optimizer, precision_mode,
+ dynamic_calib_engine)
+ print("Running calibration graph, config:\n%s" % str(calib_config))
+ if use_optimizer:
+ self.assertTrue(False)
+ # TODO(aaroey): uncomment this and get infer_gdef when this mode is
+ # supported.
+ # result = self._RunCalibration(graph_key, input_gdef, self._input,
+ # calib_config)
+ else:
+ calib_gdef = self._GetTrtGraph(input_gdef, precision_mode,
+ dynamic_calib_engine)
+ self._VerifyGraphDef(graph_key, calib_gdef, precision_mode, False,
+ dynamic_calib_engine)
+ result = self._RunCalibration(graph_key, calib_gdef, self._input,
+ calib_config)
+ infer_gdef = trt.calib_graph_to_infer_graph(calib_gdef)
+ self._VerifyGraphDef(graph_key, infer_gdef, precision_mode, True,
+ dynamic_calib_engine)
+ self.assertAllClose(ref_result, result, rtol=1.e-03)
+ else:
+ infer_gdef = input_gdef
+
+ # Run inference.
+ infer_config = self._GetConfigProto(use_optimizer, precision_mode,
+ dynamic_infer_engine)
+ print("Running final inference graph, config:\n%s" % str(infer_config))
+ if use_optimizer:
+ result = self._RunGraph(graph_key, infer_gdef, self._input, infer_config)
+ else:
+ trt_infer_gdef = self._GetTrtGraph(infer_gdef, precision_mode,
+ dynamic_infer_engine)
+ self._VerifyGraphDef(graph_key, trt_infer_gdef, precision_mode, True,
+ dynamic_infer_engine)
+ result = self._RunGraph(graph_key, trt_infer_gdef, self._input,
+ infer_config)
+ self.assertAllClose(ref_result, result, rtol=1.e-03)
+
+ def testIdempotence(self):
+ # Test that applying tensorrt optimizer or offline conversion tools multiple
+ # times to the same graph will result in same graph.
+ # TODO(aaroey): implement this.
+ pass
+
+
+def GetTests():
+
+ def _GetTest(g, u, p, i, c):
+
+ def _Test(self):
+ print("Running test with parameters: graph_key=%s, use_optimizer=%s, "
+ "precision_mode=%s, dynamic_infer_engine=%s, "
+ "dynamic_calib_engine=%s" % (g, u, p, i, c))
+ self._RunTest(g, u, p, i, c)
+
+ return _Test
+
+ use_optimizer_options = [False, True]
+ precision_mode_options = [MODE_FP32, MODE_FP16, MODE_INT8]
+ dynamic_infer_engine_options = [False, True]
+ dynamic_calib_engine_options = [False, True]
+ for (graph_key, use_optimizer, precision_mode,
+ dynamic_infer_engine, dynamic_calib_engine) in itertools.product(
+ TEST_GRAPHS, use_optimizer_options, precision_mode_options,
+ dynamic_infer_engine_options, dynamic_calib_engine_options):
+ if precision_mode == MODE_INT8:
+ if not dynamic_calib_engine and dynamic_infer_engine:
+ # TODO(aaroey): test this case, the conversion from static calibration
+ # engine to dynamic inference engine should be a noop.
+ continue
+ if use_optimizer:
+ # TODO(aaroey): if use_optimizer is True we need to get the inference
+ # graphdef using custom python wrapper class, which is not currently
+ # supported yet.
+ continue
+ if not dynamic_calib_engine:
+ # TODO(aaroey): construction of static calibration engine is not
+ # supported yet.
+ continue
+ if dynamic_calib_engine and not dynamic_infer_engine:
+ # TODO(aaroey): construction of static inference engine using dynamic
+ # calibration engine is not supported yet.
+ continue
+ else: # In non int8 mode.
+ if dynamic_calib_engine:
+ # dynamic_calib_engine doesn't affect non-int8 modes, so just let
+ # related tests run once on dynamic_calib_engine=False.
+ continue
+ yield _GetTest(graph_key, use_optimizer, precision_mode,
+ dynamic_infer_engine, dynamic_calib_engine)
if __name__ == "__main__":
- googletest.main()
+ for index, t in enumerate(GetTests()):
+ setattr(TfTrtIntegrationTest, "testTfTRT_" + str(index), t)
+ test.main()