aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/tensorrt
diff options
context:
space:
mode:
authorGravatar Guangda Lai <31743510+aaroey@users.noreply.github.com>2018-09-18 22:32:56 -0700
committerGravatar Guangda Lai <31743510+aaroey@users.noreply.github.com>2018-09-18 22:32:56 -0700
commit8d5a36ef0f7e65a84d64c800ca5527a3cc6ff2f0 (patch)
tree7d8582b9a46244a6eabe71988ea5e3b9c2f9af27 /tensorflow/contrib/tensorrt
parent65231a4c48ce3a1297d00e2a6310be05e79ed88c (diff)
Fix trt allocator and tensor name issues in convert_nodes.cc.
Diffstat (limited to 'tensorflow/contrib/tensorrt')
-rw-r--r--tensorflow/contrib/tensorrt/convert/convert_graph.cc6
-rw-r--r--tensorflow/contrib/tensorrt/convert/convert_nodes.cc13
-rw-r--r--tensorflow/contrib/tensorrt/resources/trt_allocator.cc8
-rw-r--r--tensorflow/contrib/tensorrt/resources/trt_allocator.h2
-rw-r--r--tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc26
5 files changed, 28 insertions, 27 deletions
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index b019c99882..fe6f8b42bd 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -678,7 +678,7 @@ tensorflow::Status CreateTRTNode(const std::vector<EngineInfo>& infos, int pos,
// Function to construct a funcdef from the segment and add it to the graph.
tensorflow::Status RegisterSegmentFunctionToFunctionLibrary(
tensorflow::Graph* graph, const tensorflow::GraphDef& segment,
- const string& name) {
+ const string& engine_name) {
tensorflow::Graph sgraph(graph->flib_def());
tensorflow::GraphConstructorOptions gcopts;
TF_RETURN_IF_ERROR(
@@ -761,9 +761,9 @@ tensorflow::Status RegisterSegmentFunctionToFunctionLibrary(
tensorflow::FunctionDefLibrary fdeflib;
auto native_segment = fdeflib.add_function();
TF_RETURN_IF_ERROR(tensorflow::GraphToFunctionDef(
- sgraph, StrCat(name, "_native_segment"), native_segment));
+ sgraph, StrCat(engine_name, "_native_segment"), native_segment));
if (VLOG_IS_ON(7)) {
- VLOG(7) << name << " Function_Def ";
+ VLOG(7) << engine_name << " Function_Def ";
VLOG(7) << native_segment->DebugString();
}
VLOG(1) << "Adding funcdef to graphlib";
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index c98b07ad8b..21fb459483 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -693,9 +693,10 @@ class Converter {
// TODO(jie): tf protobuf seems to be omitting the :0 suffix
string output_name = node_def.name();
if (i != 0) output_name = StrCat(output_name, ":", i);
- if (output.is_tensor()) {
- output.tensor()->setName(output_name.c_str());
- }
+ // We should not call output.tensor()->setName(), since the name may have
+ // already been set before (e.g. for Identity op where the output is the
+ // input, if its input is one of the engine input, setting the name here
+ // will overwrite engine input bindings which will cause runtime error).
VLOG(2) << "Adding out tensor " << output_name << ": "
<< output.DebugString();
if (!trt_tensors_.insert({output_name, output}).second) {
@@ -779,8 +780,7 @@ class Converter {
// skip control nodes
if (input_name[0] == '^') continue;
string name = input_name;
- auto first = name.find_first_of(':');
- // TODO(aaroey): why removing the colon but not the zero? A bug?
+ auto first = name.find_last_of(':');
// TODO(aaroey): use TensorId
if (first != string::npos && first + 2 == name.size() &&
name[first + 1] == '0') {
@@ -1301,7 +1301,6 @@ tensorflow::Status ConvertConv2DHelper(
layer->setStride(stride);
layer->setPadding({padding[0].first, padding[1].first});
- layer->setName(node_def.name().c_str());
layer->setNbGroups(num_groups);
nvinfer1::ITensor* output_tensor = layer->getOutput(0);
VLOG(2) << "TENSOR out: " << DebugString(output_tensor->getDimensions());
@@ -1547,7 +1546,6 @@ tensorflow::Status ConvertPool(Converter& ctx,
layer->setStride(stride);
layer->setPadding({padding[0].first, padding[1].first});
- layer->setName(node_def.name().c_str());
nvinfer1::ITensor* output_tensor = layer->getOutput(0);
if (data_format == "NHWC") {
@@ -2697,7 +2695,6 @@ tensorflow::Status ConvertGraphDefToEngine(
TrtUniquePtrType<nvinfer1::IBuilder> builder(
nvinfer1::createInferBuilder(*logger));
builder->setMaxBatchSize(max_batch_size);
- // TODO(aaroey): use the allocator to allocate the TRT workspace.
builder->setMaxWorkspaceSize(max_workspace_size_bytes);
#if NV_TENSORRT_MAJOR > 3
builder->setGpuAllocator(allocator);
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
index d8f97bfbbc..f6cf72e07f 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
@@ -27,7 +27,7 @@ namespace tensorflow {
namespace tensorrt {
// std::align is not supported, so this method mimic its behavior.
-void* Align(size_t alignment, size_t size, void*& ptr, size_t& space) {
+void* Align(int64_t alignment, int64_t size, void*& ptr, int64_t& space) {
QCHECK_GT(alignment, 0) << "alignment must be greater than 0.";
QCHECK_EQ(0, alignment & (alignment - 1)) << "Alignment must be power of 2.";
QCHECK_GT(size, 0) << "size must be greater than 0.";
@@ -67,12 +67,16 @@ void TRTCudaAllocator::free(void* memory) { cudaFree(memory); }
void* TRTDeviceAllocator::allocate(uint64_t size, uint64_t alignment,
uint32_t flags) {
+ if (size == 0) return nullptr;
// WAR for allocator alignment requirement. Certain cuda API calls require GPU
// memory with alignemtn to cudaDeviceProp::textureAlignment.
// See issue #20856
alignment = 512;
assert((alignment & (alignment - 1)) == 0); // zero or a power of 2.
- size_t total_size = size + alignment;
+ int64_t total_size = size + alignment;
+ // TODO(aaroey): AllocateRaw takes size_t size as input, so it'll produce
+ // unexpected result when TRT tries to allocate more bytes than size_t can
+ // carry. Fix this.
void* mem = allocator_->AllocateRaw(alignment, total_size);
if (!mem) return nullptr;
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.h b/tensorflow/contrib/tensorrt/resources/trt_allocator.h
index 6f94492083..0be3c4fd07 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator.h
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.h
@@ -29,7 +29,7 @@ limitations under the License.
namespace tensorflow {
namespace tensorrt {
// std::align is not supported, so this function mimic its behavior.
-void* Align(size_t alignment, size_t size, void*& ptr, size_t& space);
+void* Align(int64_t alignment, int64_t size, void*& ptr, int64_t& space);
} // namespace tensorrt
} // namespace tensorflow
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
index f515ed03f2..06bfb3269c 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
@@ -20,11 +20,11 @@ limitations under the License.
namespace tensorflow {
namespace tensorrt {
-bool RunTest(const size_t alignment, const size_t size,
- const intptr_t orig_ptr_val, const size_t orig_space) {
+bool RunTest(const int64_t alignment, const int64_t size,
+ const intptr_t orig_ptr_val, const int64_t orig_space) {
void* const orig_ptr = reinterpret_cast<void*>(orig_ptr_val);
void* ptr = orig_ptr;
- size_t space = orig_space;
+ int64_t space = orig_space;
void* result = Align(alignment, size, ptr, space);
if (result == nullptr) {
EXPECT_EQ(orig_ptr, ptr);
@@ -43,25 +43,25 @@ bool RunTest(const size_t alignment, const size_t size,
}
TEST(TRTAllocatorTest, Align) {
- for (const size_t space :
- {1, 2, 3, 4, 7, 8, 9, 10, 16, 32, 511, 512, 513, 700, 12345}) {
- for (size_t alignment = 1; alignment <= space * 4; alignment *= 2) {
- for (const intptr_t ptr_val :
- {1ul, alignment == 1 ? 1ul : alignment - 1, alignment, alignment + 1,
+ for (const int64_t space : {1l, 2l, 3l, 4l, 7l, 8l, 9l, 10l, 16l, 32l, 511l,
+ 512l, 513l, 700l, 12345l, 1l << 32}) {
+ for (int64_t alignment = 1; alignment <= space * 4; alignment *= 2) {
+ for (const uintptr_t ptr_val :
+ {1l, alignment == 1 ? 1l : alignment - 1, alignment, alignment + 1,
alignment + (alignment / 2)}) {
if (ptr_val % alignment == 0) {
- for (const size_t size :
- {1ul, space == 1 ? 1ul : space - 1, space, space + 1}) {
+ for (const int64_t size :
+ {1l, space == 1 ? 1l : space - 1, space, space + 1}) {
EXPECT_EQ(space >= size, RunTest(alignment, size, ptr_val, space));
}
} else {
EXPECT_FALSE(RunTest(alignment, space, ptr_val, space));
- const size_t diff = alignment - ptr_val % alignment;
+ const int64_t diff = alignment - ptr_val % alignment;
if (space > diff) {
EXPECT_TRUE(
RunTest(alignment, space - diff, ptr_val + diff, space - diff));
- for (const size_t size :
- {1ul, space - diff > 1 ? space - diff - 1 : 1ul, space - diff,
+ for (const int64_t size :
+ {1l, space - diff > 1 ? space - diff - 1 : 1l, space - diff,
space - diff + 1, space - 1}) {
EXPECT_EQ(space - diff >= size,
RunTest(alignment, size, ptr_val, space));