diff options
author | 2018-09-21 09:44:20 -0700 | |
---|---|---|
committer | 2018-09-21 09:44:20 -0700 | |
commit | 0e3ab3ab0d511f681b954322afc2ae89c8ea7d8f (patch) | |
tree | cfd0a10f35113c42001b00d3d855e69db30b4760 /tensorflow | |
parent | 5877baddc72e3f234f6e0a174447becd4cabc493 (diff) | |
parent | 72e085ca1701e275acec381885b519fa6b06522c (diff) |
Merge pull request #22371 from aaroey:fix_zero_size_allocation
PiperOrigin-RevId: 213998222
Diffstat (limited to 'tensorflow')
7 files changed, 41 insertions, 35 deletions
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index f29f4d6deb..7ad9bf22d3 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -678,7 +678,7 @@ tensorflow::Status CreateTRTNode(const std::vector<EngineInfo>& infos, int pos, // Function to construct a funcdef from the segment and add it to the graph. tensorflow::Status RegisterSegmentFunctionToFunctionLibrary( tensorflow::Graph* graph, const tensorflow::GraphDef& segment, - const string& name) { + const string& engine_name) { tensorflow::Graph sgraph(graph->flib_def()); tensorflow::GraphConstructorOptions gcopts; TF_RETURN_IF_ERROR( @@ -761,9 +761,9 @@ tensorflow::Status RegisterSegmentFunctionToFunctionLibrary( tensorflow::FunctionDefLibrary fdeflib; auto native_segment = fdeflib.add_function(); TF_RETURN_IF_ERROR(tensorflow::GraphToFunctionDef( - sgraph, StrCat(name, "_native_segment"), native_segment)); + sgraph, StrCat(engine_name, "_native_segment"), native_segment)); if (VLOG_IS_ON(7)) { - VLOG(7) << name << " Function_Def "; + VLOG(7) << engine_name << " Function_Def "; VLOG(7) << native_segment->DebugString(); } VLOG(1) << "Adding funcdef to graphlib"; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index c98b07ad8b..0ce891782e 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -693,8 +693,15 @@ class Converter { // TODO(jie): tf protobuf seems to be omitting the :0 suffix string output_name = node_def.name(); if (i != 0) output_name = StrCat(output_name, ":", i); + // We need to check the name before setting it. For Identity op where the + // output is the input, if its input is one of the engine input, setting + // the name here will overwrite engine input bindings which will cause + // runtime error. if (output.is_tensor()) { - output.tensor()->setName(output_name.c_str()); + const char* tensor_name = output.tensor()->getName(); + if (tensor_name == nullptr || std::strlen(tensor_name) == 0) { + output.tensor()->setName(output_name.c_str()); + } } VLOG(2) << "Adding out tensor " << output_name << ": " << output.DebugString(); @@ -779,12 +786,11 @@ class Converter { // skip control nodes if (input_name[0] == '^') continue; string name = input_name; - auto first = name.find_first_of(':'); - // TODO(aaroey): why removing the colon but not the zero? A bug? + auto last = name.find_last_of(':'); // TODO(aaroey): use TensorId - if (first != string::npos && first + 2 == name.size() && - name[first + 1] == '0') { - name.erase(first); + if (last != string::npos && last + 2 == name.size() && + name[last + 1] == '0') { + name.erase(last); } if (trt_tensors_.count(name)) { @@ -2697,7 +2703,6 @@ tensorflow::Status ConvertGraphDefToEngine( TrtUniquePtrType<nvinfer1::IBuilder> builder( nvinfer1::createInferBuilder(*logger)); builder->setMaxBatchSize(max_batch_size); - // TODO(aaroey): use the allocator to allocate the TRT workspace. builder->setMaxWorkspaceSize(max_workspace_size_bytes); #if NV_TENSORRT_MAJOR > 3 builder->setGpuAllocator(allocator); diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc index d8f97bfbbc..a9425864dd 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc +++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc @@ -27,12 +27,16 @@ namespace tensorflow { namespace tensorrt { // std::align is not supported, so this method mimic its behavior. -void* Align(size_t alignment, size_t size, void*& ptr, size_t& space) { - QCHECK_GT(alignment, 0) << "alignment must be greater than 0."; +// +// NOTE(aaroey): according to the TensorRT API, +// nvinfer1::IGpuAllocator::allocate() uses uint64_t type for size and alignment +// parameters, so here we use the same type to make it compatible. +void* Align(uint64_t alignment, uint64_t size, void*& ptr, uint64_t& space) { + QCHECK_GT(alignment, 0ul) << "alignment must be greater than 0."; QCHECK_EQ(0, alignment & (alignment - 1)) << "Alignment must be power of 2."; - QCHECK_GT(size, 0) << "size must be greater than 0."; + QCHECK_GT(size, 0ul) << "size must be greater than 0."; QCHECK(ptr) << "ptr must not be nullptr."; - QCHECK_GT(space, 0) << "space must be greater than 0."; + QCHECK_GT(space, 0ul) << "space must be greater than 0."; const uintptr_t ptr_val = reinterpret_cast<uintptr_t>(ptr); QCHECK_GE(ptr_val + space, ptr_val) << "Provided space overflows."; @@ -67,12 +71,16 @@ void TRTCudaAllocator::free(void* memory) { cudaFree(memory); } void* TRTDeviceAllocator::allocate(uint64_t size, uint64_t alignment, uint32_t flags) { + if (size == 0) return nullptr; // WAR for allocator alignment requirement. Certain cuda API calls require GPU // memory with alignemtn to cudaDeviceProp::textureAlignment. // See issue #20856 alignment = 512; assert((alignment & (alignment - 1)) == 0); // zero or a power of 2. - size_t total_size = size + alignment; + uint64_t total_size = size + alignment; + // TODO(aaroey): AllocateRaw takes size_t size as input, so it'll produce + // unexpected result when TRT tries to allocate more bytes than size_t can + // carry. Fix this. void* mem = allocator_->AllocateRaw(alignment, total_size); if (!mem) return nullptr; diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.h b/tensorflow/contrib/tensorrt/resources/trt_allocator.h index 6f94492083..dc9862b16c 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_allocator.h +++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.h @@ -29,7 +29,7 @@ limitations under the License. namespace tensorflow { namespace tensorrt { // std::align is not supported, so this function mimic its behavior. -void* Align(size_t alignment, size_t size, void*& ptr, size_t& space); +void* Align(uint64_t alignment, uint64_t size, void*& ptr, uint64_t& space); } // namespace tensorrt } // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc index f515ed03f2..ad6b1d7d4c 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc +++ b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc @@ -20,11 +20,11 @@ limitations under the License. namespace tensorflow { namespace tensorrt { -bool RunTest(const size_t alignment, const size_t size, - const intptr_t orig_ptr_val, const size_t orig_space) { +bool RunTest(const uint64_t alignment, const uint64_t size, + const intptr_t orig_ptr_val, const uint64_t orig_space) { void* const orig_ptr = reinterpret_cast<void*>(orig_ptr_val); void* ptr = orig_ptr; - size_t space = orig_space; + uint64_t space = orig_space; void* result = Align(alignment, size, ptr, space); if (result == nullptr) { EXPECT_EQ(orig_ptr, ptr); @@ -43,24 +43,25 @@ bool RunTest(const size_t alignment, const size_t size, } TEST(TRTAllocatorTest, Align) { - for (const size_t space : - {1, 2, 3, 4, 7, 8, 9, 10, 16, 32, 511, 512, 513, 700, 12345}) { - for (size_t alignment = 1; alignment <= space * 4; alignment *= 2) { - for (const intptr_t ptr_val : + for (const uint64_t space : + {1ul, 2ul, 3ul, 4ul, 7ul, 8ul, 9ul, 10ul, 16ul, 32ul, 511ul, 512ul, + 513ul, 700ul, 12345ul, 1ul << 32}) { + for (uint64_t alignment = 1; alignment <= space * 4; alignment *= 2) { + for (const uintptr_t ptr_val : {1ul, alignment == 1 ? 1ul : alignment - 1, alignment, alignment + 1, alignment + (alignment / 2)}) { if (ptr_val % alignment == 0) { - for (const size_t size : + for (const uint64_t size : {1ul, space == 1 ? 1ul : space - 1, space, space + 1}) { EXPECT_EQ(space >= size, RunTest(alignment, size, ptr_val, space)); } } else { EXPECT_FALSE(RunTest(alignment, space, ptr_val, space)); - const size_t diff = alignment - ptr_val % alignment; + const uint64_t diff = alignment - ptr_val % alignment; if (space > diff) { EXPECT_TRUE( RunTest(alignment, space - diff, ptr_val + diff, space - diff)); - for (const size_t size : + for (const uint64_t size : {1ul, space - diff > 1 ? space - diff - 1 : 1ul, space - diff, space - diff + 1, space - 1}) { EXPECT_EQ(space - diff >= size, diff --git a/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py b/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py index 62f4e525f7..d2f65344da 100644 --- a/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py +++ b/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py @@ -144,14 +144,6 @@ class BiasaddMatMulTest(trt_test.TfTrtIntegrationTestBase): # mode, which is a bug. Re-enable this when trt library is fixed. return not trt_test.IsQuantizationMode(run_params.precision_mode) - def ExpectedAbsoluteTolerance(self, run_params): - """The absolute tolerance to compare floating point results.""" - return 1.e-05 if run_params.precision_mode == "FP32" else 1.e-03 - - def ExpectedRelativeTolerance(self, run_params): - """The relative tolerance to compare floating point results.""" - return 1.e-05 if run_params.precision_mode == "FP32" else 1.e-03 - if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py index 699f79adec..4f935a7665 100644 --- a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py +++ b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py @@ -134,7 +134,7 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): dims[0] for dims in self._GetParamsCached().input_dims if len(dims) ]), max_workspace_size_bytes=1 << 25, - precision_mode=self._ToBytes(run_params.precision_mode), + precision_mode=run_params.precision_mode, minimum_segment_size=2, is_dynamic_op=run_params.dynamic_engine, maximum_cached_engines=1, |