aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/contrib/tensorrt/resources/trt_allocator.cc')
-rw-r--r--tensorflow/contrib/tensorrt/resources/trt_allocator.cc65
1 files changed, 58 insertions, 7 deletions
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
index 9f115990c3..d8f97bfbbc 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
@@ -19,12 +19,42 @@ limitations under the License.
#if GOOGLE_CUDA
#if GOOGLE_TENSORRT
+#include "cuda/include/cuda_runtime_api.h"
+#endif // GOOGLE_TENSORRT
+#endif // GOOGLE_CUDA
+
+namespace tensorflow {
+namespace tensorrt {
+
+// std::align is not supported, so this method mimic its behavior.
+void* Align(size_t alignment, size_t size, void*& ptr, size_t& space) {
+ QCHECK_GT(alignment, 0) << "alignment must be greater than 0.";
+ QCHECK_EQ(0, alignment & (alignment - 1)) << "Alignment must be power of 2.";
+ QCHECK_GT(size, 0) << "size must be greater than 0.";
+ QCHECK(ptr) << "ptr must not be nullptr.";
+ QCHECK_GT(space, 0) << "space must be greater than 0.";
+ const uintptr_t ptr_val = reinterpret_cast<uintptr_t>(ptr);
+ QCHECK_GE(ptr_val + space, ptr_val) << "Provided space overflows.";
+ if (size > space) return nullptr;
+ const uintptr_t aligned_ptr_val = ((ptr_val + alignment - 1) & -alignment);
+ if (aligned_ptr_val > ptr_val + space - size) return nullptr;
+ ptr = reinterpret_cast<void*>(aligned_ptr_val);
+ const uintptr_t diff = aligned_ptr_val - ptr_val;
+ space -= diff;
+ return ptr;
+}
+
+} // namespace tensorrt
+} // namespace tensorflow
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
#if NV_TENSORRT_MAJOR > 2
-#include "cuda/include/cuda_runtime_api.h"
namespace tensorflow {
namespace tensorrt {
+
void* TRTCudaAllocator::allocate(uint64_t size, uint64_t alignment,
uint32_t flags) {
assert((alignment & (alignment - 1)) == 0); // zero or a power of 2.
@@ -37,10 +67,23 @@ void TRTCudaAllocator::free(void* memory) { cudaFree(memory); }
void* TRTDeviceAllocator::allocate(uint64_t size, uint64_t alignment,
uint32_t flags) {
+ // WAR for allocator alignment requirement. Certain cuda API calls require GPU
+ // memory with alignemtn to cudaDeviceProp::textureAlignment.
+ // See issue #20856
+ alignment = 512;
assert((alignment & (alignment - 1)) == 0); // zero or a power of 2.
- void* mem = allocator_->AllocateRaw(alignment, size);
- VLOG(2) << "Allocated " << size << " bytes with alignment " << alignment
- << " @ " << mem;
+ size_t total_size = size + alignment;
+ void* mem = allocator_->AllocateRaw(alignment, total_size);
+ if (!mem) return nullptr;
+
+ void* alloc_mem = mem;
+ QCHECK(Align(alignment, size, mem, total_size));
+ if (mem != alloc_mem) {
+ QCHECK(mem_map_.insert({mem, alloc_mem}).second);
+ }
+ VLOG(2) << "Allocated " << total_size << " bytes memory @" << alloc_mem
+ << "; aligned to " << size << " bytes @" << mem << " with alignment "
+ << alignment;
return mem;
}
@@ -51,12 +94,20 @@ TRTDeviceAllocator::TRTDeviceAllocator(tensorflow::Allocator* allocator)
void TRTDeviceAllocator::free(void* memory) {
VLOG(2) << "Deallocating @ " << memory;
- allocator_->DeallocateRaw(memory);
+ // allocated memory adjusted for alignment, restore the original pointer
+ if (memory) {
+ auto alloc_mem = mem_map_.find(memory);
+ if (alloc_mem != mem_map_.end()) {
+ memory = alloc_mem->second;
+ mem_map_.erase(alloc_mem->first);
+ }
+ allocator_->DeallocateRaw(memory);
+ }
}
} // namespace tensorrt
} // namespace tensorflow
#endif
-#endif
-#endif
+#endif // GOOGLE_TENSORRT
+#endif // GOOGLE_CUDA