aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/tensorrt/test/test_tftrt.py
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/contrib/tensorrt/test/test_tftrt.py')
-rw-r--r--tensorflow/contrib/tensorrt/test/test_tftrt.py138
1 files changed, 116 insertions, 22 deletions
diff --git a/tensorflow/contrib/tensorrt/test/test_tftrt.py b/tensorflow/contrib/tensorrt/test/test_tftrt.py
index 175ccd8006..090aa8bdb0 100644
--- a/tensorflow/contrib/tensorrt/test/test_tftrt.py
+++ b/tensorflow/contrib/tensorrt/test/test_tftrt.py
@@ -20,6 +20,7 @@ from __future__ import print_function
import argparse
import numpy as np
+import six as _six
# normally we should do import tensorflow as tf and then
# tf.placeholder, tf.constant, tf.nn.conv2d etc but
@@ -35,10 +36,75 @@ from tensorflow.python.framework import dtypes as dtypes
from tensorflow.python.framework import importer as importer
from tensorflow.python.framework import ops as ops
from tensorflow.python.ops import array_ops as aops
+from tensorflow.python.ops import math_ops as mops
from tensorflow.python.ops import nn as nn
from tensorflow.python.ops import nn_ops as nn_ops
+def py2bytes(inp):
+ return inp
+
+
+def py3bytes(inp):
+ return inp.encode("utf-8", errors="surrogateescape")
+
+
+def py2string(inp):
+ return inp
+
+
+def py3string(inp):
+ return inp.decode("utf-8")
+
+
+if _six.PY2:
+ to_bytes = py2bytes
+ to_string = py2string
+else:
+ to_bytes = py3bytes
+ to_string = py3string
+
+
+def get_multi_engine_graph_def(mode="FP32"):
+ """Create a simple graph and return its graph_def."""
+ dtype = dtypes.float32
+ if mode.upper() == "FP16":
+ dtype = dtypes.float16
+ else:
+ pass
+
+ g = ops.Graph()
+ with g.as_default():
+ x = aops.placeholder(shape=[None, 3, 7, 5], name="input", dtype=dtype)
+ with g.name_scope("Global_scope"):
+ with g.name_scope("first_scope"):
+ e = cop.constant(
+ np.random.randn(3, 2, 3, 4), name="weights", dtype=dtype)
+ conv = nn.conv2d(
+ input=x,
+ filter=e,
+ data_format="NCHW",
+ strides=[1, 1, 1, 1],
+ padding="VALID",
+ name="conv")
+ b = cop.constant(np.random.randn(1, 4, 1, 1), name="bias1", dtype=dtype)
+ t = conv * b
+
+ b = cop.constant(np.random.randn(1, 4, 1, 1), name="bias2", dtype=dtype)
+ q = conv / b
+ edge = mops.sin(q)
+ edge1 = mops.cos(conv)
+ with g.name_scope("test_scope"):
+ de = edge + edge1
+ t -= edge1
+ q *= edge
+ t += q
+ t -= de
+ k = aops.squeeze(t, name="output")
+ print(k.dtype)
+ return g.as_graph_def()
+
+
def get_simple_graph_def():
"""Create a simple graph and return its graph_def."""
g = ops.Graph()
@@ -65,7 +131,9 @@ def get_simple_graph_def():
def execute_graph(gdef, dumm_inp):
"""Run given graphdef once."""
print("executing")
- gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50)
+ gpu_options = None
+ if trt.trt_convert.get_linked_tensorrt_version()[0] == 3:
+ gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50)
sessconfig = cpb2.ConfigProto(gpu_options=gpu_options)
ops.reset_default_graph()
g = ops.Graph()
@@ -83,7 +151,9 @@ def execute_graph(gdef, dumm_inp):
# for calibration. For this test script it is random data.
def execute_calibration(gdef, dumm_inp):
"""Run given calibration graph multiple times."""
- gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50)
+ gpu_options = None
+ if trt.trt_convert.get_linked_tensorrt_version()[0] == 3:
+ gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50)
ops.reset_default_graph()
g = ops.Graph()
with g.as_default():
@@ -100,12 +170,17 @@ def execute_calibration(gdef, dumm_inp):
return val
-def user(run_graph=execute_graph, run_calibration=execute_calibration):
+def user(multi_engine,
+ run_graph=execute_graph,
+ run_calibration=execute_calibration):
"""Example function that converts a graph to TFTRT graph."""
-
- inp_dims = (100, 24, 24, 2)
+ if multi_engine:
+ inp_dims = (2, 3, 7, 5)
+ orig_graph = get_multi_engine_graph_def()
+ else:
+ inp_dims = (100, 24, 24, 2)
+ orig_graph = get_simple_graph_def() # use a frozen graph for inference
dummy_input = np.random.random_sample(inp_dims)
- orig_graph = get_simple_graph_def() # use a frozen graph for inference
# Get optimized graph
trt_graph = trt.create_inference_graph(
input_graph_def=orig_graph,
@@ -113,8 +188,10 @@ def user(run_graph=execute_graph, run_calibration=execute_calibration):
max_batch_size=inp_dims[0],
max_workspace_size_bytes=1 << 25,
precision_mode="FP32", # TRT Engine precision "FP32","FP16" or "INT8"
- minimum_segment_size=2 # minimum number of nodes in an engine
- )
+ minimum_segment_size=2, # minimum number of nodes in an engine
+ is_dynamic_op=False,
+ maximum_cached_engines=1,
+ cached_engine_batches=[])
o1 = run_graph(orig_graph, dummy_input)
o2 = run_graph(trt_graph, dummy_input)
o3 = run_graph(trt_graph, dummy_input)
@@ -126,40 +203,51 @@ def user(run_graph=execute_graph, run_calibration=execute_calibration):
max_batch_size=inp_dims[0],
max_workspace_size_bytes=1 << 25,
precision_mode="FP16", # TRT Engine precision "FP32","FP16" or "INT8"
- minimum_segment_size=2 # minimum number of nodes in an engine
- )
+ minimum_segment_size=2, # minimum number of nodes in an engine
+ is_dynamic_op=False,
+ maximum_cached_engines=1,
+ cached_engine_batches=[])
int8_calib_gdef = trt.create_inference_graph(
input_graph_def=orig_graph,
outputs=["output"],
max_batch_size=inp_dims[0],
max_workspace_size_bytes=1 << 25,
precision_mode="INT8", # TRT Engine precision "FP32","FP16" or "INT8"
- minimum_segment_size=2 # minimum number of nodes in an engine
- )
+ minimum_segment_size=2, # minimum number of nodes in an engine
+ is_dynamic_op=False,
+ maximum_cached_engines=1,
+ cached_engine_batches=[])
o4 = run_graph(fp16_graph, dummy_input)
_ = run_calibration(int8_calib_gdef, dummy_input)
int8_graph = trt.calib_graph_to_infer_graph(int8_calib_gdef)
o5 = run_graph(int8_graph, dummy_input)
- assert np.allclose(o1, o4)
- assert np.allclose(o1, o5)
+ print("Is FP32 == FP16? %s (False is possible)" % np.allclose(o1, o4))
+ print("Is FP32 == INT8? %s (False is possible)" % np.allclose(o1, o5))
print("Pass")
-def auto():
+def auto(multi_engine):
"""Run the conversion as an optimization pass."""
- inp_dims = (100, 24, 24, 2)
+ if multi_engine:
+ inp_dims = (2, 3, 7, 5)
+ orig_graph = get_multi_engine_graph_def()
+ else:
+ inp_dims = (100, 24, 24, 2)
+ orig_graph = get_simple_graph_def() # use a frozen graph for inference
dummy_input = np.random.random_sample(inp_dims)
- orig_graph = get_simple_graph_def()
opt_config = rwpb2.RewriterConfig()
+ opt_config.meta_optimizer_iterations = opt_config.ONE
opt_config.optimizers.extend(["constfold", "layout"])
custom_op = opt_config.custom_optimizers.add()
custom_op.name = "TensorRTOptimizer"
custom_op.parameter_map["minimum_segment_size"].i = 3
- custom_op.parameter_map["precision_mode"].s = "FP32"
+ custom_op.parameter_map["precision_mode"].s = to_bytes("FP32")
custom_op.parameter_map["max_batch_size"].i = inp_dims[0]
custom_op.parameter_map["max_workspace_size_bytes"].i = 1 << 25
print(custom_op)
- gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50)
+ gpu_options = None
+ if trt.trt_convert.get_linked_tensorrt_version()[0] == 3:
+ gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50)
graph_options = cpb2.GraphOptions(rewrite_options=opt_config)
sessconfig = cpb2.ConfigProto(
gpu_options=gpu_options, graph_options=graph_options)
@@ -168,7 +256,7 @@ def auto():
ops.reset_default_graph()
with g.as_default():
inp, out = importer.import_graph_def(
- graph_def=orig_graph, return_elements=["input", "output"])
+ graph_def=orig_graph, return_elements=["input", "output"], name="")
inp = inp.outputs[0]
out = out.outputs[0]
with csess.Session(config=sessconfig, graph=g) as sess:
@@ -186,8 +274,14 @@ if "__main__" in __name__:
action="store_true",
help="Do TRT conversion automatically",
default=False)
+ P.add_argument(
+ "--multi-engine",
+ "-m",
+ action="store_true",
+ help="Use a graph that will result in 2 engines",
+ default=False)
flags, unparsed = P.parse_known_args()
if flags.automatic:
- auto()
+ auto(flags.multi_engine)
else:
- user()
+ user(flags.multi_engine)