Add more tests

author: gracehoney <31743510+aaroey@users.noreply.github.com> 2018-07-30 12:04:22 -0700
committer: gracehoney <31743510+aaroey@users.noreply.github.com> 2018-07-30 12:04:22 -0700
commit: e8e2cc72f3367aee1789dc0f5bcbd8f027c7180f (patch)
tree: 703cd2a889e68ba8a1e7fa9fc26ebbb8200fb38d /tensorflow/contrib/tensorrt
parent: 4158295eef9489610ddcbfa8ba3d8bda43e65194 (diff)
7 files changed, 152 insertions, 64 deletions
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index 1e6300578d..e06704f5d1 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -326,32 +326,12 @@ tensorflow::Status GetEngineInfo(
         }
         VLOG(1) << "Adding const node " << input_node->name();
         QCHECK(subgraph_node_names.insert(input_node->name()).second);
-#if 1
         // Since we duplicate the const input node in both the segment graphdef
         // and the engine, the segment node doesn't depend on it anymore, so we
         // add a control dependency instead.
         info->connections.emplace_back(
             input_node->name(), input_node->id(), node_name, node_id,
             /*input_edge=*/true);
-#else
-        // Add control inputs to the const node as control input connections to
-        // the engine.
-        for (const auto const_in_edge : input_node->in_edges()) {
-          QCHECK(const_in_edge->IsControlEdge());  // Must be control edge.
-          auto const_in_node = const_in_edge->src();
-          QCHECK(!segment_nodes.count(const_in_node->name()))
-              << "Loop found between segment and non-segment nodes, from "
-                 "segment node "
-              << const_in_node->name() << " to non-segment node "
-              << input_node->name() << " to segment node " << node->name();
-          if (const_in_node->IsSource()) continue;
-          VLOG(1) << "Control edge from node " << const_in_node->name()
-                  << " to " << input_node->name();
-          info->connections.emplace_back(
-              const_in_node->name(), const_in_node->id(), input_node->name(),
-              input_node->id(), /*input_edge=*/true);
-        }
-#endif
       } else {
         // Non-const data input.
         int port = Graph::kControlSlot - 1;
diff --git a/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc b/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc
index 044c736c03..f33f2cc4d6 100644
--- a/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc
+++ b/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/stacktrace.h"
 
 #if GOOGLE_CUDA
 #if GOOGLE_TENSORRT
@@ -189,9 +190,6 @@ tensorflow::Status TRTOptimizationPass::Optimize(
     tensorflow::grappler::Cluster* cluster,
     const tensorflow::grappler::GrapplerItem& item, GraphDef* optimized_graph) {
   VLOG(1) << "Called TRTOptimization Pass " << name_;
-  if (VLOG_IS_ON(1)) {
-    PrintDebugInfo(cluster, item);
-  }
   // This is a hack to workaround optimizer issue. MetaOptimizer calls
   // optimization passes on function objects as well, we should not modify
   // generated funcdefs! This is fragile but we don't have any other option
@@ -203,6 +201,10 @@ tensorflow::Status TRTOptimizationPass::Optimize(
     *optimized_graph = item.graph;
     return tensorflow::Status::OK();
   }
+  if (VLOG_IS_ON(1)) {
+    VLOG(2) << CurrentStackTrace();
+    PrintDebugInfo(cluster, item);
+  }
   int max_dim = -1;
   if (item.feed.size()) {
     for (const auto& f : item.feed) {
diff --git a/tensorflow/contrib/tensorrt/segment/segment.cc b/tensorflow/contrib/tensorrt/segment/segment.cc
index 008fffc954..e1ed7ebf6c 100644
--- a/tensorflow/contrib/tensorrt/segment/segment.cc
+++ b/tensorflow/contrib/tensorrt/segment/segment.cc
@@ -558,27 +558,36 @@ tensorflow::Status SegmentGraph(
       // then after doing this operation the resulting subgraph will keep the
       // same properties 1 and 2.
       //
-      // For simplicity we use heuristics: for input nodes remove all its
-      // input, for output nodes remove all its output. In this way, for common
-      // cases the number of removed nodes should be minimum.
+      // For simplicity we use heuristics: for input and const output nodes
+      // remove all their inputs, and for non-const output nodes remove all
+      // their outputs. In this way, for common cases the number of removed
+      // nodes should be minimum.
       auto remove_nodes = [&segment_nodes](
                               bool is_input_nodes,
                               std::deque<const tensorflow::Node*>* que) {
         // Run a BFS on the queue to find all the input/output nodes.
         std::set<const tensorflow::Node*> visited;
+        std::set<const tensorflow::Node*> logged(que->begin(), que->end());
         while (!que->empty()) {
           auto node = que->front();
           que->pop_front();
           if (!visited.insert(node).second) continue;
           segment_nodes.erase(node);
-          for (auto in :
-               is_input_nodes ? node->in_nodes() : node->out_nodes()) {
+          for (auto in : (is_input_nodes || node->type_string() == "Const")
+                             ? node->in_nodes()
+                             : node->out_nodes()) {
             if (segment_nodes.count(in)) {
               que->push_back(in);
-              VLOG(2) << "Need to remove node " << in->name()
-                      << " because one of its "
-                      << (is_input_nodes ? "output" : "input")
-                      << " nodes in the graph was removed: " << node->name();
+              if (VLOG_IS_ON(2)) {
+                if (!logged.count(in)) {
+                  VLOG(2) << "----> Need to remove node " << in->name()
+                          << " because one of its "
+                          << (is_input_nodes ? "output" : "input")
+                          << " nodes in the graph was removed: "
+                          << node->name();
+                  logged.insert(in);
+                }
+              }
             }
           }
         }
diff --git a/tensorflow/contrib/tensorrt/test/base_test.py b/tensorflow/contrib/tensorrt/test/base_test.py
index 9d14e635f4..e765ae3661 100644
--- a/tensorflow/contrib/tensorrt/test/base_test.py
+++ b/tensorflow/contrib/tensorrt/test/base_test.py
@@ -234,5 +234,109 @@ class ConstInputTest(trt_test.TfTrtIntegrationTestBase):
         allclose_atol=1.e-06,
         allclose_rtol=1.e-06)
 
+
+class ConstDataInputSingleEngineTest(trt_test.TfTrtIntegrationTestBase):
+
+  def GetParams(self):
+    """Create a graph containing single segment."""
+    input_name = "input"
+    input_dims = [2, 32, 32, 3]
+    g = ops.Graph()
+    with g.as_default():
+      inp = array_ops.placeholder(
+          dtype=dtypes.float32, shape=input_dims, name=input_name)
+      with g.device("/GPU:0"):
+        n = inp
+        c = constant_op.constant(1.0, name="c")
+        n = math_ops.add(n, c, name="add")
+        n = math_ops.mul(n, n, name="mul")
+        n = math_ops.add(n, n, name="add1")
+      array_ops.squeeze(n, name=self.output_name)
+    return trt_test.TfTrtIntegrationTestParams(
+        gdef=g.as_graph_def(),
+        input_names=[input_name],
+        input_dims=[input_dims],
+        expected_engines={"my_trt_op_0": ["c", "add", "add1", "mul"]},
+        expected_output_dims=tuple(input_dims),
+        allclose_atol=1.e-06,
+        allclose_rtol=1.e-06)
+
+
+class ConstDataInputMultipleEnginesTest(trt_test.TfTrtIntegrationTestBase):
+
+  def GetParams(self):
+    """Create a graph containing multiple segment."""
+    input_name = "input"
+    input_dims = [2, 32, 32, 3]
+    g = ops.Graph()
+    with g.as_default():
+      inp = array_ops.placeholder(
+          dtype=dtypes.float32, shape=input_dims, name=input_name)
+      with g.device("/GPU:0"):
+        n = inp
+        c = constant_op.constant(1.0, name="c")
+        n = math_ops.add(n, c, name="add")
+        n = math_ops.mul(n, n, name="mul")
+        n = math_ops.add(n, n, name="add1")
+        n = self.trt_incompatible_op(n, name="incompatible1")
+        n = math_ops.add(n, c, name="add2")
+        n = math_ops.mul(n, n, name="mul1")
+        n = math_ops.add(n, n, name="add3")
+      array_ops.squeeze(n, name=self.output_name)
+    return trt_test.TfTrtIntegrationTestParams(
+        gdef=g.as_graph_def(),
+        input_names=[input_name],
+        input_dims=[input_dims],
+        expected_engines={
+            "my_trt_op_0": ["add2", "add3", "mul1"],
+            "my_trt_op_1": ["add", "add1", "mul"]
+        },
+        expected_output_dims=tuple(input_dims),
+        allclose_atol=1.e-06,
+        allclose_rtol=1.e-06)
+
+
+class ControlDependencyTest(trt_test.TfTrtIntegrationTestBase):
+
+  def GetParams(self):
+    """Create a graph containing multiple segment."""
+    input_name = "input"
+    input_dims = [2, 32, 32, 3]
+    g = ops.Graph()
+    with g.as_default():
+      inp = array_ops.placeholder(
+          dtype=dtypes.float32, shape=input_dims, name=input_name)
+      with g.device("/GPU:0"):
+        c1 = constant_op.constant(1.0, name="c1")
+        c2 = constant_op.constant(1.0, name="c2")
+        d1 = constant_op.constant(1.0, name="d1")
+        d2 = self.trt_incompatible_op(inp, name="d2")
+        with g.control_dependencies([d1, d2]):
+          add = math_ops.add(inp, c1, name="add")
+        with g.control_dependencies([d1, d2]):
+          mul = math_ops.mul(add, add, name="mul")
+        with g.control_dependencies([d1, d2]):
+          add1 = math_ops.add(mul, mul, name="add1")
+        edge = self.trt_incompatible_op(add1, name="incompatible")
+        with g.control_dependencies([d1, d2, add, mul]):
+          add2 = math_ops.add(edge, c2, name="add2")
+        with g.control_dependencies([d1, d2, add1, mul]):
+          mul1 = math_ops.mul(add2, add2, name="mul1")
+        with g.control_dependencies([d1, d2, add, add1]):
+          add3 = math_ops.add(mul1, mul1, name="add3")
+      array_ops.squeeze(add3, name=self.output_name)
+    return trt_test.TfTrtIntegrationTestParams(
+        gdef=g.as_graph_def(),
+        input_names=[input_name],
+        input_dims=[input_dims],
+        expected_engines={
+            "my_trt_op_0": ["c1", "add", "add1", "mul"],
+            "my_trt_op_1": ["c2", "add2", "add3", "mul1"]
+        },
+        expected_output_dims=tuple(input_dims),
+        allclose_atol=1.e-06,
+        allclose_rtol=1.e-06)
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/tensorrt/test/memory_alignment_test.py b/tensorflow/contrib/tensorrt/test/memory_alignment_test.py
index 3dd95c6f62..66eb6be757 100644
--- a/tensorflow/contrib/tensorrt/test/memory_alignment_test.py
+++ b/tensorflow/contrib/tensorrt/test/memory_alignment_test.py
@@ -62,7 +62,7 @@ class MemoryAlignmentTest(trt_test.TfTrtIntegrationTestBase):
         gdef=g.as_graph_def(),
         input_names=[input_name],
         input_dims=[input_dims],
-        num_expected_engines=1,
+        expected_engines=["my_trt_op_0"],
         expected_output_dims=(2, 15, 15, 10),
         allclose_atol=1.e-02,
         allclose_rtol=1.e-02)
diff --git a/tensorflow/contrib/tensorrt/test/neighboring_engine_test.py b/tensorflow/contrib/tensorrt/test/neighboring_engine_test.py
index 97e0d23b18..51c905a50b 100644
--- a/tensorflow/contrib/tensorrt/test/neighboring_engine_test.py
+++ b/tensorflow/contrib/tensorrt/test/neighboring_engine_test.py
@@ -25,7 +25,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import gen_math_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.platform import test
 
@@ -51,15 +51,18 @@ class NeighboringEngineTest(trt_test.TfTrtIntegrationTestBase):
           name="conv")
       b = constant_op.constant(
           np.random.normal(1.0, 1.0, [1, 4, 1, 1]), name="bias", dtype=dtype)
-      t = conv * b
-      e = gen_math_ops.tan(conv)
-      t = t - e
+      t = math_ops.mul(conv, b, name="mul")
+      e = self.trt_incompatible_op(conv, name="incompatible")
+      t = math_ops.sub(t, e, name="sub")
       array_ops.squeeze(t, name=self.output_name)
     return trt_test.TfTrtIntegrationTestParams(
         gdef=g.as_graph_def(),
         input_names=[input_name],
         input_dims=[input_dims],
-        expected_engines=["my_trt_op_0", "my_trt_op_1"],
+        expected_engines={
+            "my_trt_op_0": ["bias", "mul", "sub"],
+            "my_trt_op_1": ["weights", "conv"]
+        },
         expected_output_dims=(2, 4, 5, 4),
         allclose_atol=1.e-03,
         allclose_rtol=1.e-03)
diff --git a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py
index 5968af28ae..a35facaf12 100644
--- a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py
+++ b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py
@@ -23,6 +23,7 @@ import itertools
 import warnings
 import numpy as np
 import six
+import os
 
 from tensorflow.contrib.tensorrt.python import trt_convert
 # pylint: disable=unused-import
@@ -151,7 +152,7 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase):
       rewriter_cfg.optimizers.extend(["constfold", "layout"])
       custom_op = rewriter_cfg.custom_optimizers.add()
       custom_op.name = "TensorRTOptimizer"
-      custom_op.parameter_map["minimum_segment_size"].i = 3
+      custom_op.parameter_map["minimum_segment_size"].i = 2
       custom_op.parameter_map["max_batch_size"].i = max(
           [dims[0] for dims in params.input_dims])
       custom_op.parameter_map["is_dynamic_op"].b = run_params.dynamic_engine
@@ -162,23 +163,6 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase):
     else:
       graph_options = config_pb2.GraphOptions()
 
-    # Disable all other optimizations which can affect the converted graph.
-    off = rewriter_config_pb2.RewriterConfig.OFF
-    graph_options.optimizer_options.opt_level = config_pb2.OptimizerOptions.L0
-    graph_options.rewrite_options.layout_optimizer = off
-    graph_options.rewrite_options.constant_folding = off
-    graph_options.rewrite_options.shape_optimization = off
-    graph_options.rewrite_options.remapping = off
-    graph_options.rewrite_options.arithmetic_optimization = off
-    graph_options.rewrite_options.dependency_optimization = off
-    graph_options.rewrite_options.loop_optimization = off
-    graph_options.rewrite_options.function_optimization = off
-    graph_options.rewrite_options.debug_stripper = off
-    graph_options.rewrite_options.disable_model_pruning = True
-    graph_options.rewrite_options.scoped_allocator_optimization = off
-    graph_options.rewrite_options.memory_optimization = (
-        rewriter_config_pb2.RewriterConfig.NO_MEM_OPT)
-
     gpu_options = config_pb2.GPUOptions()
     gpu_options.allow_growth = True
     if trt_convert.get_linked_tensorrt_version()[0] == 3:
@@ -188,9 +172,14 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase):
         gpu_options=gpu_options, graph_options=graph_options)
     return config
 
-  def _ExpectTestValue(self, engine_name, method, value):
+  def _ExpectTestValue(self, engine_name, method, expected_value):
+    label = "%s:%s" % (engine_name, method)
+    actual_value = trt_convert.get_test_value(label)
     self.assertEqual(
-        value, trt_convert.get_test_value("%s:%s" % (engine_name, method)))
+        expected_value,
+        actual_value,
+        msg="Unexpected test value with label %s. Actual: %s; expected: %s" %
+        (label, actual_value, expected_value))
 
   def _ExpectCalibration(self, engine_name, value):
     self._ExpectTestValue(engine_name, "ExecuteCalibration", value)
@@ -257,8 +246,9 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase):
     graph_name = (
         self.__class__.__name__ + "_" + run_params.test_name + "_" + label +
         ".pbtxt")
-    logging.info("Writing graph to %s/%s", self.get_temp_dir(), graph_name)
-    graph_io.write_graph(gdef, self.get_temp_dir(), graph_name)
+    temp_dir = os.getenv('TRT_TEST_TMPDIR', self.get_temp_dir())
+    logging.info("Writing graph to %s/%s", temp_dir, graph_name)
+    graph_io.write_graph(gdef, temp_dir, graph_name)
 
   def _VerifyConnections(self, params, converted_gdef):
     old_to_new_node_map = {
@@ -314,8 +304,8 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase):
     self.assertEqual(
         expected_input_map,
         actual_input_map,
-        msg="expected:\n%s\nvs actual:\n%s" % (expected_input_map,
-                                               actual_input_map))
+        msg="expected:\n%s\nvs actual:\n%s" % (sorted(
+            expected_input_map.items()), sorted(actual_input_map.items())))
 
   def _VerifyGraphDef(self, params, run_params, gdef, graph_state):
     self._WriteGraph(params, run_params, gdef, graph_state)
@@ -432,7 +422,7 @@ def _AddTests(test_class):
       logging.info(
           "Running test %s with parameters: use_optimizer=%s, "
           "precision_mode=%s, dynamic_engine=%s",
-          "testTfTRT_" + run_params.test_name, run_params.use_optimizer,
+          "testTfTrt_" + run_params.test_name, run_params.use_optimizer,
           run_params.precision_mode, run_params.dynamic_engine)
       self.RunTest(params, run_params)
 
@@ -461,7 +451,7 @@ def _AddTests(test_class):
         precision_mode=precision_mode,
         dynamic_engine=dynamic_engine,
         test_name=test_name)
-    setattr(test_class, "testTfTRT_" + test_name, _GetTest(run_params))
+    setattr(test_class, "testTfTrt_" + test_name, _GetTest(run_params))
 
 
 if trt_convert.is_tensorrt_enabled():
author	gracehoney <31743510+aaroey@users.noreply.github.com>	2018-07-30 12:04:22 -0700
committer	gracehoney <31743510+aaroey@users.noreply.github.com>	2018-07-30 12:04:22 -0700
commit	e8e2cc72f3367aee1789dc0f5bcbd8f027c7180f (patch)
tree	703cd2a889e68ba8a1e7fa9fc26ebbb8200fb38d /tensorflow/contrib/tensorrt
parent	4158295eef9489610ddcbfa8ba3d8bda43e65194 (diff)