diff options
author | Guangda Lai <31743510+aaroey@users.noreply.github.com> | 2018-09-18 22:32:56 -0700 |
---|---|---|
committer | Guangda Lai <31743510+aaroey@users.noreply.github.com> | 2018-09-18 22:32:56 -0700 |
commit | 8d5a36ef0f7e65a84d64c800ca5527a3cc6ff2f0 (patch) | |
tree | 7d8582b9a46244a6eabe71988ea5e3b9c2f9af27 /tensorflow/contrib/tensorrt | |
parent | 65231a4c48ce3a1297d00e2a6310be05e79ed88c (diff) |
Fix trt allocator and tensor name issues in convert_nodes.cc.
Diffstat (limited to 'tensorflow/contrib/tensorrt')
5 files changed, 28 insertions, 27 deletions
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index b019c99882..fe6f8b42bd 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -678,7 +678,7 @@ tensorflow::Status CreateTRTNode(const std::vector<EngineInfo>& infos, int pos, // Function to construct a funcdef from the segment and add it to the graph. tensorflow::Status RegisterSegmentFunctionToFunctionLibrary( tensorflow::Graph* graph, const tensorflow::GraphDef& segment, - const string& name) { + const string& engine_name) { tensorflow::Graph sgraph(graph->flib_def()); tensorflow::GraphConstructorOptions gcopts; TF_RETURN_IF_ERROR( @@ -761,9 +761,9 @@ tensorflow::Status RegisterSegmentFunctionToFunctionLibrary( tensorflow::FunctionDefLibrary fdeflib; auto native_segment = fdeflib.add_function(); TF_RETURN_IF_ERROR(tensorflow::GraphToFunctionDef( - sgraph, StrCat(name, "_native_segment"), native_segment)); + sgraph, StrCat(engine_name, "_native_segment"), native_segment)); if (VLOG_IS_ON(7)) { - VLOG(7) << name << " Function_Def "; + VLOG(7) << engine_name << " Function_Def "; VLOG(7) << native_segment->DebugString(); } VLOG(1) << "Adding funcdef to graphlib"; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index c98b07ad8b..21fb459483 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -693,9 +693,10 @@ class Converter { // TODO(jie): tf protobuf seems to be omitting the :0 suffix string output_name = node_def.name(); if (i != 0) output_name = StrCat(output_name, ":", i); - if (output.is_tensor()) { - output.tensor()->setName(output_name.c_str()); - } + // We should not call output.tensor()->setName(), since the name may have + // already been set before (e.g. for Identity op where the output is the + // input, if its input is one of the engine input, setting the name here + // will overwrite engine input bindings which will cause runtime error). VLOG(2) << "Adding out tensor " << output_name << ": " << output.DebugString(); if (!trt_tensors_.insert({output_name, output}).second) { @@ -779,8 +780,7 @@ class Converter { // skip control nodes if (input_name[0] == '^') continue; string name = input_name; - auto first = name.find_first_of(':'); - // TODO(aaroey): why removing the colon but not the zero? A bug? + auto first = name.find_last_of(':'); // TODO(aaroey): use TensorId if (first != string::npos && first + 2 == name.size() && name[first + 1] == '0') { @@ -1301,7 +1301,6 @@ tensorflow::Status ConvertConv2DHelper( layer->setStride(stride); layer->setPadding({padding[0].first, padding[1].first}); - layer->setName(node_def.name().c_str()); layer->setNbGroups(num_groups); nvinfer1::ITensor* output_tensor = layer->getOutput(0); VLOG(2) << "TENSOR out: " << DebugString(output_tensor->getDimensions()); @@ -1547,7 +1546,6 @@ tensorflow::Status ConvertPool(Converter& ctx, layer->setStride(stride); layer->setPadding({padding[0].first, padding[1].first}); - layer->setName(node_def.name().c_str()); nvinfer1::ITensor* output_tensor = layer->getOutput(0); if (data_format == "NHWC") { @@ -2697,7 +2695,6 @@ tensorflow::Status ConvertGraphDefToEngine( TrtUniquePtrType<nvinfer1::IBuilder> builder( nvinfer1::createInferBuilder(*logger)); builder->setMaxBatchSize(max_batch_size); - // TODO(aaroey): use the allocator to allocate the TRT workspace. builder->setMaxWorkspaceSize(max_workspace_size_bytes); #if NV_TENSORRT_MAJOR > 3 builder->setGpuAllocator(allocator); diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc index d8f97bfbbc..f6cf72e07f 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc +++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc @@ -27,7 +27,7 @@ namespace tensorflow { namespace tensorrt { // std::align is not supported, so this method mimic its behavior. -void* Align(size_t alignment, size_t size, void*& ptr, size_t& space) { +void* Align(int64_t alignment, int64_t size, void*& ptr, int64_t& space) { QCHECK_GT(alignment, 0) << "alignment must be greater than 0."; QCHECK_EQ(0, alignment & (alignment - 1)) << "Alignment must be power of 2."; QCHECK_GT(size, 0) << "size must be greater than 0."; @@ -67,12 +67,16 @@ void TRTCudaAllocator::free(void* memory) { cudaFree(memory); } void* TRTDeviceAllocator::allocate(uint64_t size, uint64_t alignment, uint32_t flags) { + if (size == 0) return nullptr; // WAR for allocator alignment requirement. Certain cuda API calls require GPU // memory with alignemtn to cudaDeviceProp::textureAlignment. // See issue #20856 alignment = 512; assert((alignment & (alignment - 1)) == 0); // zero or a power of 2. - size_t total_size = size + alignment; + int64_t total_size = size + alignment; + // TODO(aaroey): AllocateRaw takes size_t size as input, so it'll produce + // unexpected result when TRT tries to allocate more bytes than size_t can + // carry. Fix this. void* mem = allocator_->AllocateRaw(alignment, total_size); if (!mem) return nullptr; diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.h b/tensorflow/contrib/tensorrt/resources/trt_allocator.h index 6f94492083..0be3c4fd07 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_allocator.h +++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.h @@ -29,7 +29,7 @@ limitations under the License. namespace tensorflow { namespace tensorrt { // std::align is not supported, so this function mimic its behavior. -void* Align(size_t alignment, size_t size, void*& ptr, size_t& space); +void* Align(int64_t alignment, int64_t size, void*& ptr, int64_t& space); } // namespace tensorrt } // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc index f515ed03f2..06bfb3269c 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc +++ b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc @@ -20,11 +20,11 @@ limitations under the License. namespace tensorflow { namespace tensorrt { -bool RunTest(const size_t alignment, const size_t size, - const intptr_t orig_ptr_val, const size_t orig_space) { +bool RunTest(const int64_t alignment, const int64_t size, + const intptr_t orig_ptr_val, const int64_t orig_space) { void* const orig_ptr = reinterpret_cast<void*>(orig_ptr_val); void* ptr = orig_ptr; - size_t space = orig_space; + int64_t space = orig_space; void* result = Align(alignment, size, ptr, space); if (result == nullptr) { EXPECT_EQ(orig_ptr, ptr); @@ -43,25 +43,25 @@ bool RunTest(const size_t alignment, const size_t size, } TEST(TRTAllocatorTest, Align) { - for (const size_t space : - {1, 2, 3, 4, 7, 8, 9, 10, 16, 32, 511, 512, 513, 700, 12345}) { - for (size_t alignment = 1; alignment <= space * 4; alignment *= 2) { - for (const intptr_t ptr_val : - {1ul, alignment == 1 ? 1ul : alignment - 1, alignment, alignment + 1, + for (const int64_t space : {1l, 2l, 3l, 4l, 7l, 8l, 9l, 10l, 16l, 32l, 511l, + 512l, 513l, 700l, 12345l, 1l << 32}) { + for (int64_t alignment = 1; alignment <= space * 4; alignment *= 2) { + for (const uintptr_t ptr_val : + {1l, alignment == 1 ? 1l : alignment - 1, alignment, alignment + 1, alignment + (alignment / 2)}) { if (ptr_val % alignment == 0) { - for (const size_t size : - {1ul, space == 1 ? 1ul : space - 1, space, space + 1}) { + for (const int64_t size : + {1l, space == 1 ? 1l : space - 1, space, space + 1}) { EXPECT_EQ(space >= size, RunTest(alignment, size, ptr_val, space)); } } else { EXPECT_FALSE(RunTest(alignment, space, ptr_val, space)); - const size_t diff = alignment - ptr_val % alignment; + const int64_t diff = alignment - ptr_val % alignment; if (space > diff) { EXPECT_TRUE( RunTest(alignment, space - diff, ptr_val + diff, space - diff)); - for (const size_t size : - {1ul, space - diff > 1 ? space - diff - 1 : 1ul, space - diff, + for (const int64_t size : + {1l, space - diff > 1 ? space - diff - 1 : 1l, space - diff, space - diff + 1, space - 1}) { EXPECT_EQ(space - diff >= size, RunTest(alignment, size, ptr_val, space)); |