#ifndef TENSORFLOW_COMMON_RUNTIME_GPU_GPU_REGION_ALLOCATOR_H_ #define TENSORFLOW_COMMON_RUNTIME_GPU_GPU_REGION_ALLOCATOR_H_ #include #include #include #include #include "tensorflow/stream_executor/stream_executor.h" #include "tensorflow/core/common_runtime/gpu/gpu_allocator_retry.h" #include "tensorflow/core/common_runtime/gpu/visitable_allocator.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/port.h" #include "tensorflow/core/platform/thread_annotations.h" namespace tensorflow { class GPURegionAllocator : public VisitableAllocator { public: // 'device_id' must be a valid device on the machine. // // total_bytes is how many bytes this allocator should allocate up // to. This may be less than the total available. explicit GPURegionAllocator(int device_id, size_t total_bytes); ~GPURegionAllocator() override; string Name() override { return "gpu_region"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override; void DeallocateRaw(void* ptr) override; void AddAllocVisitor(Visitor visitor) override; // Does nothing, because regions are never freed. void AddFreeVisitor(Visitor visitor) override {} bool TracksAllocationSizes() override; size_t RequestedSize(void* ptr) override; size_t AllocatedSize(void* ptr) override; private: // A Chunk is the header on a single piece of memory given back // in response to an AllocateRaw() call. struct Chunk { char* ptr; // pointer to granted GPU buffer. size_t size; // Full size of GPU buffer. size_t bytes_allocated; // Bytes asked for by client. bool in_use; Chunk* prev; // Used for chaining in pool. Chunk* next; Chunk() : ptr(nullptr), size(0), bytes_allocated(0), in_use(false), prev(nullptr), next(nullptr) {} }; // A Pool is a collection of same-sized Chunks. struct Pool { int num_chunks; // total chunks in this pool int num_free; // total free chunks in this pool int64 cumulative_malloced; // number of chunks malloced so far int64 cumulative_freed; // number of chunks freed so far // double-linked ring of chunks; all free chunks precede all // granted chunks Chunk* first; Chunk* last; Pool() : num_chunks(0), num_free(0), cumulative_malloced(0), cumulative_freed(0), first(nullptr), last(nullptr) {} string ToString() const { return strings::StrCat("chunks: ", num_chunks, " free: ", num_free, " cumulative malloc: ", cumulative_malloced, " cumulative freed: ", cumulative_freed); } }; // A Region is a single area of GPU memory that has been // reserved by this class and carved up into Chunks. struct Region { char* ptr; // base GPU ptr char* next; // frontier of unused part of region size_t size; Region() : ptr(nullptr), size(0) {} }; // Calculate size of chunk for an allocation of this size. // Min chunk size is 16, for alignment. // For larger sizes, we round up somewhat so there are fewer // size-specific pools. static size_t ChunkSize(size_t bytes); void* AllocateRawInternal(size_t alignment, size_t num_bytes, bool dump_log_on_failure); void DeallocateRawInternal(void* ptr); bool ExpandPool(Pool* p, size_t chunk_size, size_t requested_size, bool dump_log_on_failure) EXCLUSIVE_LOCKS_REQUIRED(lock_); // Inspects region maps and crashes with debug information if there // are any memory leaks as detected by the region allocator. void CheckForMemoryLeaks() LOCKS_EXCLUDED(lock_); void DumpMemoryLog() EXCLUSIVE_LOCKS_REQUIRED(lock_); perftools::gputools::StreamExecutor* stream_exec_; // Not owned. typedef std::unordered_map PoolMap; typedef std::unordered_map ChunkMap; GPUAllocatorRetry retry_helper_; mutable mutex lock_; PoolMap pools_ GUARDED_BY(lock_); // Owns regions. std::vector regions_ GUARDED_BY(lock_); // Maps from GPU ptr to Chunk owning it. // // Owns chunks. ChunkMap chunk_map_ GUARDED_BY(lock_); // Called once on each region, ASAP. std::vector region_visitors_ GUARDED_BY(lock_); const int device_id_; // Total amount of memory (in bytes) available to this Allocator const size_t total_bytes_; // Total amount of memory allocated to regions. size_t allocated_memory_ = 0; size_t region_size_ = 0; TF_DISALLOW_COPY_AND_ASSIGN(GPURegionAllocator); }; } // namespace tensorflow #endif // TENSORFLOW_COMMON_RUNTIME_GPU_GPU_REGION_ALLOCATOR_H_