diff options
Diffstat (limited to 'tensorflow/core/common_runtime/gpu/gpu_region_allocator.h')
-rw-r--r-- | tensorflow/core/common_runtime/gpu/gpu_region_allocator.h | 146 |
1 files changed, 146 insertions, 0 deletions
diff --git a/tensorflow/core/common_runtime/gpu/gpu_region_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_region_allocator.h new file mode 100644 index 0000000000..1a250b6ede --- /dev/null +++ b/tensorflow/core/common_runtime/gpu/gpu_region_allocator.h @@ -0,0 +1,146 @@ +#ifndef TENSORFLOW_COMMON_RUNTIME_GPU_GPU_REGION_ALLOCATOR_H_ +#define TENSORFLOW_COMMON_RUNTIME_GPU_GPU_REGION_ALLOCATOR_H_ + +#include <memory> +#include <string> +#include <unordered_map> +#include <vector> + +#include "tensorflow/stream_executor/stream_executor.h" +#include "tensorflow/core/common_runtime/gpu/gpu_allocator_retry.h" +#include "tensorflow/core/common_runtime/gpu/visitable_allocator.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/port.h" +#include "tensorflow/core/platform/thread_annotations.h" + +namespace tensorflow { + +class GPURegionAllocator : public VisitableAllocator { + public: + // 'device_id' must be a valid device on the machine. + // + // total_bytes is how many bytes this allocator should allocate up + // to. This may be less than the total available. + explicit GPURegionAllocator(int device_id, size_t total_bytes); + ~GPURegionAllocator() override; + + string Name() override { return "gpu_region"; } + void* AllocateRaw(size_t alignment, size_t num_bytes) override; + void DeallocateRaw(void* ptr) override; + void AddAllocVisitor(Visitor visitor) override; + // Does nothing, because regions are never freed. + void AddFreeVisitor(Visitor visitor) override {} + + bool TracksAllocationSizes() override; + size_t RequestedSize(void* ptr) override; + size_t AllocatedSize(void* ptr) override; + + private: + // A Chunk is the header on a single piece of memory given back + // in response to an AllocateRaw() call. + struct Chunk { + char* ptr; // pointer to granted GPU buffer. + size_t size; // Full size of GPU buffer. + size_t bytes_allocated; // Bytes asked for by client. + bool in_use; + Chunk* prev; // Used for chaining in pool. + Chunk* next; + Chunk() + : ptr(nullptr), + size(0), + bytes_allocated(0), + in_use(false), + prev(nullptr), + next(nullptr) {} + }; + + // A Pool is a collection of same-sized Chunks. + struct Pool { + int num_chunks; // total chunks in this pool + int num_free; // total free chunks in this pool + int64 cumulative_malloced; // number of chunks malloced so far + int64 cumulative_freed; // number of chunks freed so far + + // double-linked ring of chunks; all free chunks precede all + // granted chunks + Chunk* first; + Chunk* last; + Pool() + : num_chunks(0), + num_free(0), + cumulative_malloced(0), + cumulative_freed(0), + first(nullptr), + last(nullptr) {} + + string ToString() const { + return strings::StrCat("chunks: ", num_chunks, " free: ", num_free, + " cumulative malloc: ", cumulative_malloced, + " cumulative freed: ", cumulative_freed); + } + }; + + // A Region is a single area of GPU memory that has been + // reserved by this class and carved up into Chunks. + struct Region { + char* ptr; // base GPU ptr + char* next; // frontier of unused part of region + size_t size; + Region() : ptr(nullptr), size(0) {} + }; + + // Calculate size of chunk for an allocation of this size. + // Min chunk size is 16, for alignment. + // For larger sizes, we round up somewhat so there are fewer + // size-specific pools. + static size_t ChunkSize(size_t bytes); + + void* AllocateRawInternal(size_t alignment, size_t num_bytes, + bool dump_log_on_failure); + void DeallocateRawInternal(void* ptr); + + bool ExpandPool(Pool* p, size_t chunk_size, size_t requested_size, + bool dump_log_on_failure) EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // Inspects region maps and crashes with debug information if there + // are any memory leaks as detected by the region allocator. + void CheckForMemoryLeaks() LOCKS_EXCLUDED(lock_); + + void DumpMemoryLog() EXCLUSIVE_LOCKS_REQUIRED(lock_); + + perftools::gputools::StreamExecutor* stream_exec_; // Not owned. + + typedef std::unordered_map<size_t, Pool> PoolMap; + typedef std::unordered_map<void*, Chunk*> ChunkMap; + + GPUAllocatorRetry retry_helper_; + mutable mutex lock_; + PoolMap pools_ GUARDED_BY(lock_); + + // Owns regions. + std::vector<Region*> regions_ GUARDED_BY(lock_); + + // Maps from GPU ptr to Chunk owning it. + // + // Owns chunks. + ChunkMap chunk_map_ GUARDED_BY(lock_); + + // Called once on each region, ASAP. + std::vector<Visitor> region_visitors_ GUARDED_BY(lock_); + + const int device_id_; + + // Total amount of memory (in bytes) available to this Allocator + const size_t total_bytes_; + + // Total amount of memory allocated to regions. + size_t allocated_memory_ = 0; + + size_t region_size_ = 0; + + TF_DISALLOW_COPY_AND_ASSIGN(GPURegionAllocator); +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_COMMON_RUNTIME_GPU_GPU_REGION_ALLOCATOR_H_ |