Merge pull request #22371 from aaroey:fix_zero_size_allocation

PiperOrigin-RevId: 213998222
author: TensorFlower Gardener <gardener@tensorflow.org> 2018-09-21 09:44:20 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-09-21 09:44:20 -0700
commit: 0e3ab3ab0d511f681b954322afc2ae89c8ea7d8f (patch)
tree: cfd0a10f35113c42001b00d3d855e69db30b4760 /tensorflow
parent: 5877baddc72e3f234f6e0a174447becd4cabc493 (diff)
parent: 72e085ca1701e275acec381885b519fa6b06522c (diff)
7 files changed, 41 insertions, 35 deletions
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index f29f4d6deb..7ad9bf22d3 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -678,7 +678,7 @@ tensorflow::Status CreateTRTNode(const std::vector<EngineInfo>& infos, int pos,
 // Function to construct a funcdef from the segment and add it to the graph.
 tensorflow::Status RegisterSegmentFunctionToFunctionLibrary(
     tensorflow::Graph* graph, const tensorflow::GraphDef& segment,
-    const string& name) {
+    const string& engine_name) {
   tensorflow::Graph sgraph(graph->flib_def());
   tensorflow::GraphConstructorOptions gcopts;
   TF_RETURN_IF_ERROR(
@@ -761,9 +761,9 @@ tensorflow::Status RegisterSegmentFunctionToFunctionLibrary(
   tensorflow::FunctionDefLibrary fdeflib;
   auto native_segment = fdeflib.add_function();
   TF_RETURN_IF_ERROR(tensorflow::GraphToFunctionDef(
-      sgraph, StrCat(name, "_native_segment"), native_segment));
+      sgraph, StrCat(engine_name, "_native_segment"), native_segment));
   if (VLOG_IS_ON(7)) {
-    VLOG(7) << name << " Function_Def ";
+    VLOG(7) << engine_name << " Function_Def ";
     VLOG(7) << native_segment->DebugString();
   }
   VLOG(1) << "Adding funcdef to graphlib";
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index c98b07ad8b..0ce891782e 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -693,8 +693,15 @@ class Converter {
       // TODO(jie): tf protobuf seems to be omitting the :0 suffix
       string output_name = node_def.name();
       if (i != 0) output_name = StrCat(output_name, ":", i);
+      // We need to check the name before setting it. For Identity op where the
+      // output is the input, if its input is one of the engine input, setting
+      // the name here will overwrite engine input bindings which will cause
+      // runtime error.
       if (output.is_tensor()) {
-        output.tensor()->setName(output_name.c_str());
+        const char* tensor_name = output.tensor()->getName();
+        if (tensor_name == nullptr || std::strlen(tensor_name) == 0) {
+          output.tensor()->setName(output_name.c_str());
+        }
       }
       VLOG(2) << "Adding out tensor " << output_name << ": "
               << output.DebugString();
@@ -779,12 +786,11 @@ class Converter {
       // skip control nodes
       if (input_name[0] == '^') continue;
       string name = input_name;
-      auto first = name.find_first_of(':');
-      // TODO(aaroey): why removing the colon but not the zero? A bug?
+      auto last = name.find_last_of(':');
       // TODO(aaroey): use TensorId
-      if (first != string::npos && first + 2 == name.size() &&
-          name[first + 1] == '0') {
-        name.erase(first);
+      if (last != string::npos && last + 2 == name.size() &&
+          name[last + 1] == '0') {
+        name.erase(last);
       }
 
       if (trt_tensors_.count(name)) {
@@ -2697,7 +2703,6 @@ tensorflow::Status ConvertGraphDefToEngine(
   TrtUniquePtrType<nvinfer1::IBuilder> builder(
       nvinfer1::createInferBuilder(*logger));
   builder->setMaxBatchSize(max_batch_size);
-  // TODO(aaroey): use the allocator to allocate the TRT workspace.
   builder->setMaxWorkspaceSize(max_workspace_size_bytes);
 #if NV_TENSORRT_MAJOR > 3
   builder->setGpuAllocator(allocator);
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
index d8f97bfbbc..a9425864dd 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
@@ -27,12 +27,16 @@ namespace tensorflow {
 namespace tensorrt {
 
 // std::align is not supported, so this method mimic its behavior.
-void* Align(size_t alignment, size_t size, void*& ptr, size_t& space) {
-  QCHECK_GT(alignment, 0) << "alignment must be greater than 0.";
+//
+// NOTE(aaroey): according to the TensorRT API,
+// nvinfer1::IGpuAllocator::allocate() uses uint64_t type for size and alignment
+// parameters, so here we use the same type to make it compatible.
+void* Align(uint64_t alignment, uint64_t size, void*& ptr, uint64_t& space) {
+  QCHECK_GT(alignment, 0ul) << "alignment must be greater than 0.";
   QCHECK_EQ(0, alignment & (alignment - 1)) << "Alignment must be power of 2.";
-  QCHECK_GT(size, 0) << "size must be greater than 0.";
+  QCHECK_GT(size, 0ul) << "size must be greater than 0.";
   QCHECK(ptr) << "ptr must not be nullptr.";
-  QCHECK_GT(space, 0) << "space must be greater than 0.";
+  QCHECK_GT(space, 0ul) << "space must be greater than 0.";
   const uintptr_t ptr_val = reinterpret_cast<uintptr_t>(ptr);
   QCHECK_GE(ptr_val + space, ptr_val) << "Provided space overflows.";
 
@@ -67,12 +71,16 @@ void TRTCudaAllocator::free(void* memory) { cudaFree(memory); }
 
 void* TRTDeviceAllocator::allocate(uint64_t size, uint64_t alignment,
                                    uint32_t flags) {
+  if (size == 0) return nullptr;
   // WAR for allocator alignment requirement. Certain cuda API calls require GPU
   // memory with alignemtn to cudaDeviceProp::textureAlignment.
   // See issue #20856
   alignment = 512;
   assert((alignment & (alignment - 1)) == 0);  // zero or a power of 2.
-  size_t total_size = size + alignment;
+  uint64_t total_size = size + alignment;
+  // TODO(aaroey): AllocateRaw takes size_t size as input, so it'll produce
+  // unexpected result when TRT tries to allocate more bytes than size_t can
+  // carry. Fix this.
   void* mem = allocator_->AllocateRaw(alignment, total_size);
   if (!mem) return nullptr;
 
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.h b/tensorflow/contrib/tensorrt/resources/trt_allocator.h
index 6f94492083..dc9862b16c 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator.h
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.h
@@ -29,7 +29,7 @@ limitations under the License.
 namespace tensorflow {
 namespace tensorrt {
 // std::align is not supported, so this function mimic its behavior.
-void* Align(size_t alignment, size_t size, void*& ptr, size_t& space);
+void* Align(uint64_t alignment, uint64_t size, void*& ptr, uint64_t& space);
 }  // namespace tensorrt
 }  // namespace tensorflow
 
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
index f515ed03f2..ad6b1d7d4c 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
@@ -20,11 +20,11 @@ limitations under the License.
 namespace tensorflow {
 namespace tensorrt {
 
-bool RunTest(const size_t alignment, const size_t size,
-             const intptr_t orig_ptr_val, const size_t orig_space) {
+bool RunTest(const uint64_t alignment, const uint64_t size,
+             const intptr_t orig_ptr_val, const uint64_t orig_space) {
   void* const orig_ptr = reinterpret_cast<void*>(orig_ptr_val);
   void* ptr = orig_ptr;
-  size_t space = orig_space;
+  uint64_t space = orig_space;
   void* result = Align(alignment, size, ptr, space);
   if (result == nullptr) {
     EXPECT_EQ(orig_ptr, ptr);
@@ -43,24 +43,25 @@ bool RunTest(const size_t alignment, const size_t size,
 }
 
 TEST(TRTAllocatorTest, Align) {
-  for (const size_t space :
-       {1, 2, 3, 4, 7, 8, 9, 10, 16, 32, 511, 512, 513, 700, 12345}) {
-    for (size_t alignment = 1; alignment <= space * 4; alignment *= 2) {
-      for (const intptr_t ptr_val :
+  for (const uint64_t space :
+       {1ul, 2ul, 3ul, 4ul, 7ul, 8ul, 9ul, 10ul, 16ul, 32ul, 511ul, 512ul,
+        513ul, 700ul, 12345ul, 1ul << 32}) {
+    for (uint64_t alignment = 1; alignment <= space * 4; alignment *= 2) {
+      for (const uintptr_t ptr_val :
            {1ul, alignment == 1 ? 1ul : alignment - 1, alignment, alignment + 1,
             alignment + (alignment / 2)}) {
         if (ptr_val % alignment == 0) {
-          for (const size_t size :
+          for (const uint64_t size :
                {1ul, space == 1 ? 1ul : space - 1, space, space + 1}) {
             EXPECT_EQ(space >= size, RunTest(alignment, size, ptr_val, space));
           }
         } else {
           EXPECT_FALSE(RunTest(alignment, space, ptr_val, space));
-          const size_t diff = alignment - ptr_val % alignment;
+          const uint64_t diff = alignment - ptr_val % alignment;
           if (space > diff) {
             EXPECT_TRUE(
                 RunTest(alignment, space - diff, ptr_val + diff, space - diff));
-            for (const size_t size :
+            for (const uint64_t size :
                  {1ul, space - diff > 1 ? space - diff - 1 : 1ul, space - diff,
                   space - diff + 1, space - 1}) {
               EXPECT_EQ(space - diff >= size,
diff --git a/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py b/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py
index 62f4e525f7..d2f65344da 100644
--- a/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py
+++ b/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py
@@ -144,14 +144,6 @@ class BiasaddMatMulTest(trt_test.TfTrtIntegrationTestBase):
     # mode, which is a bug. Re-enable this when trt library is fixed.
     return not trt_test.IsQuantizationMode(run_params.precision_mode)
 
-  def ExpectedAbsoluteTolerance(self, run_params):
-    """The absolute tolerance to compare floating point results."""
-    return 1.e-05 if run_params.precision_mode == "FP32" else 1.e-03
-
-  def ExpectedRelativeTolerance(self, run_params):
-    """The relative tolerance to compare floating point results."""
-    return 1.e-05 if run_params.precision_mode == "FP32" else 1.e-03
-
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py
index 699f79adec..4f935a7665 100644
--- a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py
+++ b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py
@@ -134,7 +134,7 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase):
             dims[0] for dims in self._GetParamsCached().input_dims if len(dims)
         ]),
         max_workspace_size_bytes=1 << 25,
-        precision_mode=self._ToBytes(run_params.precision_mode),
+        precision_mode=run_params.precision_mode,
         minimum_segment_size=2,
         is_dynamic_op=run_params.dynamic_engine,
         maximum_cached_engines=1,
author	TensorFlower Gardener <gardener@tensorflow.org>	2018-09-21 09:44:20 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-09-21 09:44:20 -0700
commit	0e3ab3ab0d511f681b954322afc2ae89c8ea7d8f (patch)
tree	cfd0a10f35113c42001b00d3d855e69db30b4760 /tensorflow
parent	5877baddc72e3f234f6e0a174447becd4cabc493 (diff)
parent	72e085ca1701e275acec381885b519fa6b06522c (diff)