Fix trt allocator and tensor name issues in convert_nodes.cc.

author: Guangda Lai <31743510+aaroey@users.noreply.github.com> 2018-09-18 22:32:56 -0700
committer: Guangda Lai <31743510+aaroey@users.noreply.github.com> 2018-09-18 22:32:56 -0700
commit: 8d5a36ef0f7e65a84d64c800ca5527a3cc6ff2f0 (patch)
tree: 7d8582b9a46244a6eabe71988ea5e3b9c2f9af27 /tensorflow/contrib/tensorrt
parent: 65231a4c48ce3a1297d00e2a6310be05e79ed88c (diff)
5 files changed, 28 insertions, 27 deletions
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index b019c99882..fe6f8b42bd 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -678,7 +678,7 @@ tensorflow::Status CreateTRTNode(const std::vector<EngineInfo>& infos, int pos,
 // Function to construct a funcdef from the segment and add it to the graph.
 tensorflow::Status RegisterSegmentFunctionToFunctionLibrary(
     tensorflow::Graph* graph, const tensorflow::GraphDef& segment,
-    const string& name) {
+    const string& engine_name) {
   tensorflow::Graph sgraph(graph->flib_def());
   tensorflow::GraphConstructorOptions gcopts;
   TF_RETURN_IF_ERROR(
@@ -761,9 +761,9 @@ tensorflow::Status RegisterSegmentFunctionToFunctionLibrary(
   tensorflow::FunctionDefLibrary fdeflib;
   auto native_segment = fdeflib.add_function();
   TF_RETURN_IF_ERROR(tensorflow::GraphToFunctionDef(
-      sgraph, StrCat(name, "_native_segment"), native_segment));
+      sgraph, StrCat(engine_name, "_native_segment"), native_segment));
   if (VLOG_IS_ON(7)) {
-    VLOG(7) << name << " Function_Def ";
+    VLOG(7) << engine_name << " Function_Def ";
     VLOG(7) << native_segment->DebugString();
   }
   VLOG(1) << "Adding funcdef to graphlib";
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index c98b07ad8b..21fb459483 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -693,9 +693,10 @@ class Converter {
       // TODO(jie): tf protobuf seems to be omitting the :0 suffix
       string output_name = node_def.name();
       if (i != 0) output_name = StrCat(output_name, ":", i);
-      if (output.is_tensor()) {
-        output.tensor()->setName(output_name.c_str());
-      }
+      // We should not call output.tensor()->setName(), since the name may have
+      // already been set before (e.g. for Identity op where the output is the
+      // input, if its input is one of the engine input, setting the name here
+      // will overwrite engine input bindings which will cause runtime error).
       VLOG(2) << "Adding out tensor " << output_name << ": "
               << output.DebugString();
       if (!trt_tensors_.insert({output_name, output}).second) {
@@ -779,8 +780,7 @@ class Converter {
       // skip control nodes
       if (input_name[0] == '^') continue;
       string name = input_name;
-      auto first = name.find_first_of(':');
-      // TODO(aaroey): why removing the colon but not the zero? A bug?
+      auto first = name.find_last_of(':');
       // TODO(aaroey): use TensorId
       if (first != string::npos && first + 2 == name.size() &&
           name[first + 1] == '0') {
@@ -1301,7 +1301,6 @@ tensorflow::Status ConvertConv2DHelper(
 
   layer->setStride(stride);
   layer->setPadding({padding[0].first, padding[1].first});
-  layer->setName(node_def.name().c_str());
   layer->setNbGroups(num_groups);
   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
   VLOG(2) << "TENSOR out: " << DebugString(output_tensor->getDimensions());
@@ -1547,7 +1546,6 @@ tensorflow::Status ConvertPool(Converter& ctx,
 
   layer->setStride(stride);
   layer->setPadding({padding[0].first, padding[1].first});
-  layer->setName(node_def.name().c_str());
   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
 
   if (data_format == "NHWC") {
@@ -2697,7 +2695,6 @@ tensorflow::Status ConvertGraphDefToEngine(
   TrtUniquePtrType<nvinfer1::IBuilder> builder(
       nvinfer1::createInferBuilder(*logger));
   builder->setMaxBatchSize(max_batch_size);
-  // TODO(aaroey): use the allocator to allocate the TRT workspace.
   builder->setMaxWorkspaceSize(max_workspace_size_bytes);
 #if NV_TENSORRT_MAJOR > 3
   builder->setGpuAllocator(allocator);
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
index d8f97bfbbc..f6cf72e07f 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
@@ -27,7 +27,7 @@ namespace tensorflow {
 namespace tensorrt {
 
 // std::align is not supported, so this method mimic its behavior.
-void* Align(size_t alignment, size_t size, void*& ptr, size_t& space) {
+void* Align(int64_t alignment, int64_t size, void*& ptr, int64_t& space) {
   QCHECK_GT(alignment, 0) << "alignment must be greater than 0.";
   QCHECK_EQ(0, alignment & (alignment - 1)) << "Alignment must be power of 2.";
   QCHECK_GT(size, 0) << "size must be greater than 0.";
@@ -67,12 +67,16 @@ void TRTCudaAllocator::free(void* memory) { cudaFree(memory); }
 
 void* TRTDeviceAllocator::allocate(uint64_t size, uint64_t alignment,
                                    uint32_t flags) {
+  if (size == 0) return nullptr;
   // WAR for allocator alignment requirement. Certain cuda API calls require GPU
   // memory with alignemtn to cudaDeviceProp::textureAlignment.
   // See issue #20856
   alignment = 512;
   assert((alignment & (alignment - 1)) == 0);  // zero or a power of 2.
-  size_t total_size = size + alignment;
+  int64_t total_size = size + alignment;
+  // TODO(aaroey): AllocateRaw takes size_t size as input, so it'll produce
+  // unexpected result when TRT tries to allocate more bytes than size_t can
+  // carry. Fix this.
   void* mem = allocator_->AllocateRaw(alignment, total_size);
   if (!mem) return nullptr;
 
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.h b/tensorflow/contrib/tensorrt/resources/trt_allocator.h
index 6f94492083..0be3c4fd07 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator.h
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.h
@@ -29,7 +29,7 @@ limitations under the License.
 namespace tensorflow {
 namespace tensorrt {
 // std::align is not supported, so this function mimic its behavior.
-void* Align(size_t alignment, size_t size, void*& ptr, size_t& space);
+void* Align(int64_t alignment, int64_t size, void*& ptr, int64_t& space);
 }  // namespace tensorrt
 }  // namespace tensorflow
 
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
index f515ed03f2..06bfb3269c 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
@@ -20,11 +20,11 @@ limitations under the License.
 namespace tensorflow {
 namespace tensorrt {
 
-bool RunTest(const size_t alignment, const size_t size,
-             const intptr_t orig_ptr_val, const size_t orig_space) {
+bool RunTest(const int64_t alignment, const int64_t size,
+             const intptr_t orig_ptr_val, const int64_t orig_space) {
   void* const orig_ptr = reinterpret_cast<void*>(orig_ptr_val);
   void* ptr = orig_ptr;
-  size_t space = orig_space;
+  int64_t space = orig_space;
   void* result = Align(alignment, size, ptr, space);
   if (result == nullptr) {
     EXPECT_EQ(orig_ptr, ptr);
@@ -43,25 +43,25 @@ bool RunTest(const size_t alignment, const size_t size,
 }
 
 TEST(TRTAllocatorTest, Align) {
-  for (const size_t space :
-       {1, 2, 3, 4, 7, 8, 9, 10, 16, 32, 511, 512, 513, 700, 12345}) {
-    for (size_t alignment = 1; alignment <= space * 4; alignment *= 2) {
-      for (const intptr_t ptr_val :
-           {1ul, alignment == 1 ? 1ul : alignment - 1, alignment, alignment + 1,
+  for (const int64_t space : {1l, 2l, 3l, 4l, 7l, 8l, 9l, 10l, 16l, 32l, 511l,
+                              512l, 513l, 700l, 12345l, 1l << 32}) {
+    for (int64_t alignment = 1; alignment <= space * 4; alignment *= 2) {
+      for (const uintptr_t ptr_val :
+           {1l, alignment == 1 ? 1l : alignment - 1, alignment, alignment + 1,
             alignment + (alignment / 2)}) {
         if (ptr_val % alignment == 0) {
-          for (const size_t size :
-               {1ul, space == 1 ? 1ul : space - 1, space, space + 1}) {
+          for (const int64_t size :
+               {1l, space == 1 ? 1l : space - 1, space, space + 1}) {
             EXPECT_EQ(space >= size, RunTest(alignment, size, ptr_val, space));
           }
         } else {
           EXPECT_FALSE(RunTest(alignment, space, ptr_val, space));
-          const size_t diff = alignment - ptr_val % alignment;
+          const int64_t diff = alignment - ptr_val % alignment;
           if (space > diff) {
             EXPECT_TRUE(
                 RunTest(alignment, space - diff, ptr_val + diff, space - diff));
-            for (const size_t size :
-                 {1ul, space - diff > 1 ? space - diff - 1 : 1ul, space - diff,
+            for (const int64_t size :
+                 {1l, space - diff > 1 ? space - diff - 1 : 1l, space - diff,
                   space - diff + 1, space - 1}) {
               EXPECT_EQ(space - diff >= size,
                         RunTest(alignment, size, ptr_val, space));
author	Guangda Lai <31743510+aaroey@users.noreply.github.com>	2018-09-18 22:32:56 -0700
committer	Guangda Lai <31743510+aaroey@users.noreply.github.com>	2018-09-18 22:32:56 -0700
commit	8d5a36ef0f7e65a84d64c800ca5527a3cc6ff2f0 (patch)
tree	7d8582b9a46244a6eabe71988ea5e3b9c2f9af27 /tensorflow/contrib/tensorrt
parent	65231a4c48ce3a1297d00e2a6310be05e79ed88c (diff)