aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
blob: d8f97bfbbc7adb10a5dda6fbc2f7a660f6cd7742 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#include "tensorflow/contrib/tensorrt/resources/trt_allocator.h"

#include "tensorflow/core/platform/logging.h"

#if GOOGLE_CUDA
#if GOOGLE_TENSORRT
#include "cuda/include/cuda_runtime_api.h"
#endif  // GOOGLE_TENSORRT
#endif  // GOOGLE_CUDA

namespace tensorflow {
namespace tensorrt {

// std::align is not supported, so this method mimic its behavior.
void* Align(size_t alignment, size_t size, void*& ptr, size_t& space) {
  QCHECK_GT(alignment, 0) << "alignment must be greater than 0.";
  QCHECK_EQ(0, alignment & (alignment - 1)) << "Alignment must be power of 2.";
  QCHECK_GT(size, 0) << "size must be greater than 0.";
  QCHECK(ptr) << "ptr must not be nullptr.";
  QCHECK_GT(space, 0) << "space must be greater than 0.";
  const uintptr_t ptr_val = reinterpret_cast<uintptr_t>(ptr);
  QCHECK_GE(ptr_val + space, ptr_val) << "Provided space overflows.";

  if (size > space) return nullptr;
  const uintptr_t aligned_ptr_val = ((ptr_val + alignment - 1) & -alignment);
  if (aligned_ptr_val > ptr_val + space - size) return nullptr;
  ptr = reinterpret_cast<void*>(aligned_ptr_val);
  const uintptr_t diff = aligned_ptr_val - ptr_val;
  space -= diff;
  return ptr;
}

}  // namespace tensorrt
}  // namespace tensorflow

#if GOOGLE_CUDA
#if GOOGLE_TENSORRT
#if NV_TENSORRT_MAJOR > 2

namespace tensorflow {
namespace tensorrt {

void* TRTCudaAllocator::allocate(uint64_t size, uint64_t alignment,
                                 uint32_t flags) {
  assert((alignment & (alignment - 1)) == 0);  // zero or a power of 2.
  void* memory;
  cudaMalloc(&memory, size);
  return memory;
}

void TRTCudaAllocator::free(void* memory) { cudaFree(memory); }

void* TRTDeviceAllocator::allocate(uint64_t size, uint64_t alignment,
                                   uint32_t flags) {
  // WAR for allocator alignment requirement. Certain cuda API calls require GPU
  // memory with alignemtn to cudaDeviceProp::textureAlignment.
  // See issue #20856
  alignment = 512;
  assert((alignment & (alignment - 1)) == 0);  // zero or a power of 2.
  size_t total_size = size + alignment;
  void* mem = allocator_->AllocateRaw(alignment, total_size);
  if (!mem) return nullptr;

  void* alloc_mem = mem;
  QCHECK(Align(alignment, size, mem, total_size));
  if (mem != alloc_mem) {
    QCHECK(mem_map_.insert({mem, alloc_mem}).second);
  }
  VLOG(2) << "Allocated " << total_size << " bytes memory @" << alloc_mem
          << "; aligned to " << size << " bytes @" << mem << " with alignment "
          << alignment;
  return mem;
}

TRTDeviceAllocator::TRTDeviceAllocator(tensorflow::Allocator* allocator)
    : allocator_(allocator) {
  VLOG(1) << "Using " << allocator->Name() << " allocator from TensorFlow";
}

void TRTDeviceAllocator::free(void* memory) {
  VLOG(2) << "Deallocating @ " << memory;
  // allocated memory adjusted for alignment, restore the original pointer
  if (memory) {
    auto alloc_mem = mem_map_.find(memory);
    if (alloc_mem != mem_map_.end()) {
      memory = alloc_mem->second;
      mem_map_.erase(alloc_mem->first);
    }
    allocator_->DeallocateRaw(memory);
  }
}

}  // namespace tensorrt
}  // namespace tensorflow

#endif
#endif  // GOOGLE_TENSORRT
#endif  // GOOGLE_CUDA