1 files changed, 146 insertions, 0 deletions
diff --git a/tensorflow/core/common_runtime/gpu/gpu_region_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_region_allocator.h
new file mode 100644
index 0000000000..1a250b6ede
--- /dev/null
+++ b/tensorflow/core/common_runtime/gpu/gpu_region_allocator.h
@@ -0,0 +1,146 @@
+#ifndef TENSORFLOW_COMMON_RUNTIME_GPU_GPU_REGION_ALLOCATOR_H_
+#define TENSORFLOW_COMMON_RUNTIME_GPU_GPU_REGION_ALLOCATOR_H_
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "tensorflow/stream_executor/stream_executor.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_allocator_retry.h"
+#include "tensorflow/core/common_runtime/gpu/visitable_allocator.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/port.h"
+#include "tensorflow/core/platform/thread_annotations.h"
+
+namespace tensorflow {
+
+class GPURegionAllocator : public VisitableAllocator {
+ public:
+  // 'device_id' must be a valid device on the machine.
+  //
+  // total_bytes is how many bytes this allocator should allocate up
+  // to.  This may be less than the total available.
+  explicit GPURegionAllocator(int device_id, size_t total_bytes);
+  ~GPURegionAllocator() override;
+
+  string Name() override { return "gpu_region"; }
+  void* AllocateRaw(size_t alignment, size_t num_bytes) override;
+  void DeallocateRaw(void* ptr) override;
+  void AddAllocVisitor(Visitor visitor) override;
+  // Does nothing, because regions are never freed.
+  void AddFreeVisitor(Visitor visitor) override {}
+
+  bool TracksAllocationSizes() override;
+  size_t RequestedSize(void* ptr) override;
+  size_t AllocatedSize(void* ptr) override;
+
+ private:
+  // A Chunk is the header on a single piece of memory given back
+  // in response to an AllocateRaw() call.
+  struct Chunk {
+    char* ptr;               // pointer to granted GPU buffer.
+    size_t size;             // Full size of GPU buffer.
+    size_t bytes_allocated;  // Bytes asked for by client.
+    bool in_use;
+    Chunk* prev;  // Used for chaining in pool.
+    Chunk* next;
+    Chunk()
+        : ptr(nullptr),
+          size(0),
+          bytes_allocated(0),
+          in_use(false),
+          prev(nullptr),
+          next(nullptr) {}
+  };
+
+  // A Pool is a collection of same-sized Chunks.
+  struct Pool {
+    int num_chunks;             // total chunks in this pool
+    int num_free;               // total free chunks in this pool
+    int64 cumulative_malloced;  // number of chunks malloced so far
+    int64 cumulative_freed;     // number of chunks freed so far
+
+    // double-linked ring of chunks; all free chunks precede all
+    // granted chunks
+    Chunk* first;
+    Chunk* last;
+    Pool()
+        : num_chunks(0),
+          num_free(0),
+          cumulative_malloced(0),
+          cumulative_freed(0),
+          first(nullptr),
+          last(nullptr) {}
+
+    string ToString() const {
+      return strings::StrCat("chunks: ", num_chunks, " free: ", num_free,
+                             " cumulative malloc: ", cumulative_malloced,
+                             " cumulative freed: ", cumulative_freed);
+    }
+  };
+
+  // A Region is a single area of GPU memory that has been
+  // reserved by this class and carved up into Chunks.
+  struct Region {
+    char* ptr;   // base GPU ptr
+    char* next;  // frontier of unused part of region
+    size_t size;
+    Region() : ptr(nullptr), size(0) {}
+  };
+
+  // Calculate size of chunk for an allocation of this size.
+  // Min chunk size is 16, for alignment.
+  // For larger sizes, we round up somewhat so there are fewer
+  // size-specific pools.
+  static size_t ChunkSize(size_t bytes);
+
+  void* AllocateRawInternal(size_t alignment, size_t num_bytes,
+                            bool dump_log_on_failure);
+  void DeallocateRawInternal(void* ptr);
+
+  bool ExpandPool(Pool* p, size_t chunk_size, size_t requested_size,
+                  bool dump_log_on_failure) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+  // Inspects region maps and crashes with debug information if there
+  // are any memory leaks as detected by the region allocator.
+  void CheckForMemoryLeaks() LOCKS_EXCLUDED(lock_);
+
+  void DumpMemoryLog() EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+  perftools::gputools::StreamExecutor* stream_exec_;  // Not owned.
+
+  typedef std::unordered_map<size_t, Pool> PoolMap;
+  typedef std::unordered_map<void*, Chunk*> ChunkMap;
+
+  GPUAllocatorRetry retry_helper_;
+  mutable mutex lock_;
+  PoolMap pools_ GUARDED_BY(lock_);
+
+  // Owns regions.
+  std::vector<Region*> regions_ GUARDED_BY(lock_);
+
+  // Maps from GPU ptr to Chunk owning it.
+  //
+  // Owns chunks.
+  ChunkMap chunk_map_ GUARDED_BY(lock_);
+
+  // Called once on each region, ASAP.
+  std::vector<Visitor> region_visitors_ GUARDED_BY(lock_);
+
+  const int device_id_;
+
+  // Total amount of memory (in bytes) available to this Allocator
+  const size_t total_bytes_;
+
+  // Total amount of memory allocated to regions.
+  size_t allocated_memory_ = 0;
+
+  size_t region_size_ = 0;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(GPURegionAllocator);
+};
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_COMMON_RUNTIME_GPU_GPU_REGION_ALLOCATOR_H_