aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow
diff options
context:
space:
mode:
authorGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-09-21 09:44:20 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-09-21 09:44:20 -0700
commit0e3ab3ab0d511f681b954322afc2ae89c8ea7d8f (patch)
treecfd0a10f35113c42001b00d3d855e69db30b4760 /tensorflow
parent5877baddc72e3f234f6e0a174447becd4cabc493 (diff)
parent72e085ca1701e275acec381885b519fa6b06522c (diff)
Merge pull request #22371 from aaroey:fix_zero_size_allocation
PiperOrigin-RevId: 213998222
Diffstat (limited to 'tensorflow')
-rw-r--r--tensorflow/contrib/tensorrt/convert/convert_graph.cc6
-rw-r--r--tensorflow/contrib/tensorrt/convert/convert_nodes.cc19
-rw-r--r--tensorflow/contrib/tensorrt/resources/trt_allocator.cc18
-rw-r--r--tensorflow/contrib/tensorrt/resources/trt_allocator.h2
-rw-r--r--tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc21
-rw-r--r--tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py8
-rw-r--r--tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py2
7 files changed, 41 insertions, 35 deletions
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index f29f4d6deb..7ad9bf22d3 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -678,7 +678,7 @@ tensorflow::Status CreateTRTNode(const std::vector<EngineInfo>& infos, int pos,
// Function to construct a funcdef from the segment and add it to the graph.
tensorflow::Status RegisterSegmentFunctionToFunctionLibrary(
tensorflow::Graph* graph, const tensorflow::GraphDef& segment,
- const string& name) {
+ const string& engine_name) {
tensorflow::Graph sgraph(graph->flib_def());
tensorflow::GraphConstructorOptions gcopts;
TF_RETURN_IF_ERROR(
@@ -761,9 +761,9 @@ tensorflow::Status RegisterSegmentFunctionToFunctionLibrary(
tensorflow::FunctionDefLibrary fdeflib;
auto native_segment = fdeflib.add_function();
TF_RETURN_IF_ERROR(tensorflow::GraphToFunctionDef(
- sgraph, StrCat(name, "_native_segment"), native_segment));
+ sgraph, StrCat(engine_name, "_native_segment"), native_segment));
if (VLOG_IS_ON(7)) {
- VLOG(7) << name << " Function_Def ";
+ VLOG(7) << engine_name << " Function_Def ";
VLOG(7) << native_segment->DebugString();
}
VLOG(1) << "Adding funcdef to graphlib";
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index c98b07ad8b..0ce891782e 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -693,8 +693,15 @@ class Converter {
// TODO(jie): tf protobuf seems to be omitting the :0 suffix
string output_name = node_def.name();
if (i != 0) output_name = StrCat(output_name, ":", i);
+ // We need to check the name before setting it. For Identity op where the
+ // output is the input, if its input is one of the engine input, setting
+ // the name here will overwrite engine input bindings which will cause
+ // runtime error.
if (output.is_tensor()) {
- output.tensor()->setName(output_name.c_str());
+ const char* tensor_name = output.tensor()->getName();
+ if (tensor_name == nullptr || std::strlen(tensor_name) == 0) {
+ output.tensor()->setName(output_name.c_str());
+ }
}
VLOG(2) << "Adding out tensor " << output_name << ": "
<< output.DebugString();
@@ -779,12 +786,11 @@ class Converter {
// skip control nodes
if (input_name[0] == '^') continue;
string name = input_name;
- auto first = name.find_first_of(':');
- // TODO(aaroey): why removing the colon but not the zero? A bug?
+ auto last = name.find_last_of(':');
// TODO(aaroey): use TensorId
- if (first != string::npos && first + 2 == name.size() &&
- name[first + 1] == '0') {
- name.erase(first);
+ if (last != string::npos && last + 2 == name.size() &&
+ name[last + 1] == '0') {
+ name.erase(last);
}
if (trt_tensors_.count(name)) {
@@ -2697,7 +2703,6 @@ tensorflow::Status ConvertGraphDefToEngine(
TrtUniquePtrType<nvinfer1::IBuilder> builder(
nvinfer1::createInferBuilder(*logger));
builder->setMaxBatchSize(max_batch_size);
- // TODO(aaroey): use the allocator to allocate the TRT workspace.
builder->setMaxWorkspaceSize(max_workspace_size_bytes);
#if NV_TENSORRT_MAJOR > 3
builder->setGpuAllocator(allocator);
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
index d8f97bfbbc..a9425864dd 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
@@ -27,12 +27,16 @@ namespace tensorflow {
namespace tensorrt {
// std::align is not supported, so this method mimic its behavior.
-void* Align(size_t alignment, size_t size, void*& ptr, size_t& space) {
- QCHECK_GT(alignment, 0) << "alignment must be greater than 0.";
+//
+// NOTE(aaroey): according to the TensorRT API,
+// nvinfer1::IGpuAllocator::allocate() uses uint64_t type for size and alignment
+// parameters, so here we use the same type to make it compatible.
+void* Align(uint64_t alignment, uint64_t size, void*& ptr, uint64_t& space) {
+ QCHECK_GT(alignment, 0ul) << "alignment must be greater than 0.";
QCHECK_EQ(0, alignment & (alignment - 1)) << "Alignment must be power of 2.";
- QCHECK_GT(size, 0) << "size must be greater than 0.";
+ QCHECK_GT(size, 0ul) << "size must be greater than 0.";
QCHECK(ptr) << "ptr must not be nullptr.";
- QCHECK_GT(space, 0) << "space must be greater than 0.";
+ QCHECK_GT(space, 0ul) << "space must be greater than 0.";
const uintptr_t ptr_val = reinterpret_cast<uintptr_t>(ptr);
QCHECK_GE(ptr_val + space, ptr_val) << "Provided space overflows.";
@@ -67,12 +71,16 @@ void TRTCudaAllocator::free(void* memory) { cudaFree(memory); }
void* TRTDeviceAllocator::allocate(uint64_t size, uint64_t alignment,
uint32_t flags) {
+ if (size == 0) return nullptr;
// WAR for allocator alignment requirement. Certain cuda API calls require GPU
// memory with alignemtn to cudaDeviceProp::textureAlignment.
// See issue #20856
alignment = 512;
assert((alignment & (alignment - 1)) == 0); // zero or a power of 2.
- size_t total_size = size + alignment;
+ uint64_t total_size = size + alignment;
+ // TODO(aaroey): AllocateRaw takes size_t size as input, so it'll produce
+ // unexpected result when TRT tries to allocate more bytes than size_t can
+ // carry. Fix this.
void* mem = allocator_->AllocateRaw(alignment, total_size);
if (!mem) return nullptr;
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.h b/tensorflow/contrib/tensorrt/resources/trt_allocator.h
index 6f94492083..dc9862b16c 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator.h
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.h
@@ -29,7 +29,7 @@ limitations under the License.
namespace tensorflow {
namespace tensorrt {
// std::align is not supported, so this function mimic its behavior.
-void* Align(size_t alignment, size_t size, void*& ptr, size_t& space);
+void* Align(uint64_t alignment, uint64_t size, void*& ptr, uint64_t& space);
} // namespace tensorrt
} // namespace tensorflow
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
index f515ed03f2..ad6b1d7d4c 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
@@ -20,11 +20,11 @@ limitations under the License.
namespace tensorflow {
namespace tensorrt {
-bool RunTest(const size_t alignment, const size_t size,
- const intptr_t orig_ptr_val, const size_t orig_space) {
+bool RunTest(const uint64_t alignment, const uint64_t size,
+ const intptr_t orig_ptr_val, const uint64_t orig_space) {
void* const orig_ptr = reinterpret_cast<void*>(orig_ptr_val);
void* ptr = orig_ptr;
- size_t space = orig_space;
+ uint64_t space = orig_space;
void* result = Align(alignment, size, ptr, space);
if (result == nullptr) {
EXPECT_EQ(orig_ptr, ptr);
@@ -43,24 +43,25 @@ bool RunTest(const size_t alignment, const size_t size,
}
TEST(TRTAllocatorTest, Align) {
- for (const size_t space :
- {1, 2, 3, 4, 7, 8, 9, 10, 16, 32, 511, 512, 513, 700, 12345}) {
- for (size_t alignment = 1; alignment <= space * 4; alignment *= 2) {
- for (const intptr_t ptr_val :
+ for (const uint64_t space :
+ {1ul, 2ul, 3ul, 4ul, 7ul, 8ul, 9ul, 10ul, 16ul, 32ul, 511ul, 512ul,
+ 513ul, 700ul, 12345ul, 1ul << 32}) {
+ for (uint64_t alignment = 1; alignment <= space * 4; alignment *= 2) {
+ for (const uintptr_t ptr_val :
{1ul, alignment == 1 ? 1ul : alignment - 1, alignment, alignment + 1,
alignment + (alignment / 2)}) {
if (ptr_val % alignment == 0) {
- for (const size_t size :
+ for (const uint64_t size :
{1ul, space == 1 ? 1ul : space - 1, space, space + 1}) {
EXPECT_EQ(space >= size, RunTest(alignment, size, ptr_val, space));
}
} else {
EXPECT_FALSE(RunTest(alignment, space, ptr_val, space));
- const size_t diff = alignment - ptr_val % alignment;
+ const uint64_t diff = alignment - ptr_val % alignment;
if (space > diff) {
EXPECT_TRUE(
RunTest(alignment, space - diff, ptr_val + diff, space - diff));
- for (const size_t size :
+ for (const uint64_t size :
{1ul, space - diff > 1 ? space - diff - 1 : 1ul, space - diff,
space - diff + 1, space - 1}) {
EXPECT_EQ(space - diff >= size,
diff --git a/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py b/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py
index 62f4e525f7..d2f65344da 100644
--- a/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py
+++ b/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py
@@ -144,14 +144,6 @@ class BiasaddMatMulTest(trt_test.TfTrtIntegrationTestBase):
# mode, which is a bug. Re-enable this when trt library is fixed.
return not trt_test.IsQuantizationMode(run_params.precision_mode)
- def ExpectedAbsoluteTolerance(self, run_params):
- """The absolute tolerance to compare floating point results."""
- return 1.e-05 if run_params.precision_mode == "FP32" else 1.e-03
-
- def ExpectedRelativeTolerance(self, run_params):
- """The relative tolerance to compare floating point results."""
- return 1.e-05 if run_params.precision_mode == "FP32" else 1.e-03
-
if __name__ == "__main__":
test.main()
diff --git a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py
index 699f79adec..4f935a7665 100644
--- a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py
+++ b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py
@@ -134,7 +134,7 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase):
dims[0] for dims in self._GetParamsCached().input_dims if len(dims)
]),
max_workspace_size_bytes=1 << 25,
- precision_mode=self._ToBytes(run_params.precision_mode),
+ precision_mode=run_params.precision_mode,
minimum_segment_size=2,
is_dynamic_op=run_params.dynamic_engine,
maximum_cached_engines=1,