diff options
Diffstat (limited to 'tensorflow/contrib/tensorrt/resources/trt_allocator.h')
-rw-r--r-- | tensorflow/contrib/tensorrt/resources/trt_allocator.h | 31 |
1 files changed, 28 insertions, 3 deletions
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.h b/tensorflow/contrib/tensorrt/resources/trt_allocator.h index c5d2cec730..6f94492083 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_allocator.h +++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.h @@ -16,13 +16,25 @@ limitations under the License. #ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_ #define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_ -#include "tensorflow/contrib/tensorrt/log/trt_logger.h" +#include <unordered_map> + #include "tensorflow/core/framework/allocator.h" #if GOOGLE_CUDA #if GOOGLE_TENSORRT #include "tensorrt/include/NvInfer.h" +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA + +namespace tensorflow { +namespace tensorrt { +// std::align is not supported, so this function mimic its behavior. +void* Align(size_t alignment, size_t size, void*& ptr, size_t& space); +} // namespace tensorrt +} // namespace tensorflow +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT #if NV_TENSORRT_MAJOR == 3 // Define interface here temporarily until TRT 4.0 is released namespace nvinfer1 { @@ -37,7 +49,14 @@ class IGpuAllocator { namespace tensorflow { namespace tensorrt { -class TRTCudaAllocator : public nvinfer1::IGpuAllocator { +class TRTBaseAllocator : public nvinfer1::IGpuAllocator { + // Base allocator class so we can have a virtual destructor; + public: + // python wrapper seems to be not happy with an pure virtual destructor; + virtual ~TRTBaseAllocator() = default; +}; + +class TRTCudaAllocator : public TRTBaseAllocator { // Allocator implementation that is using cuda allocator instead of device // allocator in case we can't get device allocator from TF. public: @@ -47,10 +66,13 @@ class TRTCudaAllocator : public nvinfer1::IGpuAllocator { void free(void* memory) override; }; -class TRTDeviceAllocator : public nvinfer1::IGpuAllocator { +class TRTDeviceAllocator : public TRTBaseAllocator { // Allocator implementation wrapping TF device allocators. public: TRTDeviceAllocator(tensorflow::Allocator* allocator); + + // TODO(aaroey): base class doesn't have a virtual destructor, work with + // Nvidia to fix it. virtual ~TRTDeviceAllocator() { VLOG(1) << "Destroying allocator attached to " << allocator_->Name(); } @@ -59,6 +81,9 @@ class TRTDeviceAllocator : public nvinfer1::IGpuAllocator { private: tensorflow::Allocator* allocator_; + + // supporting alignment from allocation request requires a map to free; + std::unordered_map<void*, void*> mem_map_; }; } // namespace tensorrt |