Change Cuda pinned memory allocation to BFC allocator.

Move the GPU-neutral code to common_runtime. Change: 117591254
author: Xiaoqiang Zheng <zhengxq@google.com> 2016-03-18 14:34:33 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-03-18 15:47:15 -0700
commit: 2b7ca5cb7e369afd69440e924e6e9d15854cb6ec (patch)
tree: 803a49a2f2097e4f2cc89aeb199d973c63b44b48 /tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
parent: 05ea40f180e528dbfde36cd338a0b6ac3cca6dd9 (diff)
1 files changed, 4 insertions, 685 deletions
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
index 03507cd948..33496154ec 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
@@ -15,17 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h"
 
-#include "tensorflow/core/common_runtime/gpu/gpu_allocator_retry.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
-#include "tensorflow/core/lib/core/bits.h"
-#include "tensorflow/core/lib/gtl/stl_util.h"
-#include "tensorflow/core/lib/strings/numbers.h"
-#include "tensorflow/core/lib/strings/str_util.h"
-#include "tensorflow/core/lib/strings/strcat.h"
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/mutex.h"
-#include "tensorflow/core/platform/stream_executor.h"
-#include "tensorflow/core/platform/types.h"
 
 namespace gpu = ::perftools::gputools;
 
@@ -36,680 +26,9 @@ GPUBFCAllocator::GPUBFCAllocator(int device_id, size_t total_memory)
 
 GPUBFCAllocator::GPUBFCAllocator(int device_id, size_t total_memory,
                                  const GPUOptions& gpu_options)
-    : device_id_(device_id),
-      free_chunks_list_(kInvalidChunkHandle),
-      next_allocation_id_(1) {
-  // Get a pointer to the stream_executor for this device
-  stream_exec_ = GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie();
-
-  if (gpu_options.allow_growth()) {
-    // 1MiB smallest initial allocation, unless total memory available
-    // is less.
-    curr_region_allocation_bytes_ =
-        RoundedBytes(std::min(total_memory, size_t{1048576}));
-  } else {
-    curr_region_allocation_bytes_ = RoundedBytes(total_memory);
-  }
-
-  // Allocate the requested amount of memory.
-  gpu_memory_size_ = total_memory;
-  stats_.bytes_limit = static_cast<int64>(total_memory);
-
-  // Create a bunch of bins of various good sizes.
-
-  // We create bins to fit all possible ranges that cover the
-  // gpu_memory_size_ starting from allocations up to 256 bytes to
-  // allocations up to (and including) the memory limit.
-  for (BinNum b = 0; b < kNumBins; b++) {
-    size_t bin_size = BinNumToSize(b);
-    VLOG(1) << "Creating bin of max chunk size "
-            << strings::HumanReadableNumBytes(bin_size);
-    new (BinFromIndex(b)) Bin(this, bin_size);
-    CHECK_EQ(BinForSize(bin_size), BinFromIndex(b));
-    CHECK_EQ(BinForSize(bin_size + 255), BinFromIndex(b));
-    CHECK_EQ(BinForSize(bin_size * 2 - 1), BinFromIndex(b));
-    if (b + 1 < kNumBins) {
-      CHECK_NE(BinForSize(bin_size * 2), BinFromIndex(b));
-    }
-  }
-}
-
-GPUBFCAllocator::~GPUBFCAllocator() {
-  // Return memory back.
-  VLOG(2) << "Number of regions allocated: "
-          << region_manager_.regions().size();
-  for (const auto& region : region_manager_.regions()) {
-    gpu::DeviceMemoryBase gpu_ptr{region.ptr()};
-    stream_exec_->Deallocate(&gpu_ptr);
-  }
-
-  for (BinNum b = 0; b < kNumBins; b++) {
-    BinFromIndex(b)->~Bin();
-  }
-}
-
-GPUBFCAllocator::Chunk* GPUBFCAllocator::ChunkFromHandle(ChunkHandle h) {
-  DCHECK_GE(h, 0);
-  DCHECK_LT(h, static_cast<int>(chunks_.size()));
-  return &(chunks_[h]);
-}
-
-bool GPUBFCAllocator::Extend(size_t rounded_bytes) {
-  // Do we have enough space to handle the client's request?
-  // If not, fail immediately.
-  if (total_region_allocated_bytes_ + rounded_bytes > gpu_memory_size_) {
-    return false;
-  }
-
-  // If curr_region_allocation_bytes_ is not enough to satisfy the
-  // allocation, keep multiplying by a power of two until that is
-  // sufficient.
-  bool increased_allocation = false;
-  while (rounded_bytes > curr_region_allocation_bytes_) {
-    curr_region_allocation_bytes_ *= 2;
-    increased_allocation = true;
-  }
-
-  // Try allocating.
-  size_t bytes = curr_region_allocation_bytes_;
-  gpu::DeviceMemory<char> gpu_mem = stream_exec_->AllocateArray<char>(bytes);
-  if (gpu_mem == nullptr && !started_backpedal_) {
-    // Only backpedal once.
-    started_backpedal_ = true;
-
-    static constexpr float kBackpedalFactor = 0.9;
-
-    // Try allocating less memory.
-    bytes = RoundedBytes(bytes * kBackpedalFactor);
-    while (gpu_mem == nullptr && bytes > rounded_bytes) {
-      gpu_mem = stream_exec_->AllocateArray<char>(bytes);
-      bytes = RoundedBytes(bytes * kBackpedalFactor);
-    }
-  }
-
-  if (gpu_mem == nullptr) {
-    return false;
-  }
-
-  if (!increased_allocation) {
-    // Increase the region size of the next required allocation.
-    curr_region_allocation_bytes_ *= 2;
-  }
-
-  VLOG(1) << "Extending allocation by " << strings::HumanReadableNumBytes(bytes)
-          << " bytes.";
-
-  total_region_allocated_bytes_ += bytes;
-  VLOG(1) << "Total allocated bytes: "
-          << strings::HumanReadableNumBytes(total_region_allocated_bytes_);
-
-  void* gpu_mem_base = gpu_mem.opaque();
-  VLOG(1) << "Allocated memory at " << gpu_mem_base << " to "
-          << static_cast<void*>(static_cast<char*>(gpu_mem_base) + bytes);
-  region_manager_.AddAllocationRegion(gpu_mem_base, bytes);
-
-  // Create one large chunk for the whole memory space that will
-  // be chunked later.
-  ChunkHandle h = AllocateChunk();
-  GPUBFCAllocator::Chunk* c = ChunkFromHandle(h);
-  c->ptr = gpu_mem_base;
-  c->size = bytes;
-  c->allocation_id = -1;
-  c->prev = kInvalidChunkHandle;
-  c->next = kInvalidChunkHandle;
-
-  region_manager_.set_handle(c->ptr, h);
-
-  // TODO(vrv): Try to merge this new region with an existing region,
-  // if the address space is contiguous, to avoid fragmentation
-  // across regions.
-
-  // Insert the chunk into the right bin.
-  InsertFreeChunkIntoBin(h);
-
-  // Invoke visitors on newly allocated region.
-  for (auto visitor : region_visitors_) {
-    visitor(gpu_mem_base, bytes);
-  }
-  return true;
-}
-
-GPUBFCAllocator::ChunkHandle GPUBFCAllocator::AllocateChunk() {
-  if (free_chunks_list_ != kInvalidChunkHandle) {
-    ChunkHandle h = free_chunks_list_;
-    Chunk* c = ChunkFromHandle(h);
-    free_chunks_list_ = c->next;
-    return h;
-  } else {
-    ChunkHandle h = chunks_.size();
-    chunks_.resize(h + 1);
-    return h;
-  }
-}
-
-void GPUBFCAllocator::DeallocateChunk(ChunkHandle h) {
-  Chunk* c = ChunkFromHandle(h);
-  c->next = free_chunks_list_;
-  free_chunks_list_ = h;
-}
-
-void* GPUBFCAllocator::AllocateRaw(size_t unused_alignment, size_t num_bytes) {
-  // Fast path: Try once to allocate without getting the retry_helper_ involved
-  void* r = AllocateRawInternal(unused_alignment, num_bytes, false);
-  if (r != nullptr) {
-    return r;
-  } else {
-    static const int64 kMaxMillisToWait = 10000;  // 10 seconds
-    return retry_helper_.AllocateRaw(
-        [this](size_t a, size_t nb, bool v) {
-          return AllocateRawInternal(a, nb, v);
-        },
-        kMaxMillisToWait, unused_alignment, num_bytes);
-  }
-}
-
-void* GPUBFCAllocator::AllocateRaw(
-    size_t unused_alignment, size_t num_bytes,
-    const AllocationAttributes& allocation_attr) {
-  if (allocation_attr.no_retry_on_failure) {
-    // Return immediately upon the first failure if this is for allocating an
-    // optional scratch space.
-    void* result = AllocateRawInternal(unused_alignment, num_bytes, false);
-    if (result == nullptr) {
-      // The counter incrementing is not thread-safe. But we don't really care.
-      // TODO(zhengxq): we should implement a LOG_FIRST_N and LOG_EVERY_N for
-      // more general usage.
-      static int log_counter = 0;
-      if (log_counter < 10) {
-        log_counter++;
-        LOG(WARNING)
-            << "Ran out of memory trying to allocate "
-            << strings::HumanReadableNumBytes(num_bytes)
-            << ". The caller indicates that this is not a failure, but"
-            << " may mean that there could be performance gains if more"
-            << " memory is available.";
-      }
-    }
-    return result;
-  } else {
-    return AllocateRaw(unused_alignment, num_bytes);
-  }
-}
-
-// static
-size_t GPUBFCAllocator::RoundedBytes(size_t bytes) {
-  size_t rounded_bytes =
-      (kMinAllocationSize *
-       ((bytes + kMinAllocationSize - 1) / kMinAllocationSize));
-  DCHECK_EQ(size_t{0}, rounded_bytes % kMinAllocationSize);
-  return rounded_bytes;
-}
-
-void* GPUBFCAllocator::AllocateRawInternal(size_t unused_alignment,
-                                           size_t num_bytes,
-                                           bool dump_log_on_failure) {
-  if (num_bytes == 0) {
-    LOG(ERROR) << "tried to allocate 0 bytes";
-    return nullptr;
-  }
-  // First, always allocate memory of at least kMinAllocationSize
-  // bytes, and always allocate multiples of kMinAllocationSize bytes
-  // so all memory addresses are nicely byte aligned.
-  size_t rounded_bytes = RoundedBytes(num_bytes);
-
-  // The BFC allocator tries to find the best fit first.
-  BinNum bin_num = BinNumForSize(rounded_bytes);
-
-  mutex_lock l(lock_);
-  void* ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes);
-  if (ptr != nullptr) {
-    return ptr;
-  }
-
-  // Try to extend
-  if (Extend(rounded_bytes)) {
-    ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes);
-    if (ptr != nullptr) {
-      return ptr;
-    }
-  }
-
-  // We searched all bins for an existing free chunk to use and
-  // couldn't find one.  This means we must have run out of memory,
-  // Dump the memory log for analysis.
-  if (dump_log_on_failure) {
-    DumpMemoryLog(rounded_bytes);
-    LOG(WARNING) << RenderOccupancy();
-    LOG(WARNING) << "Ran out of memory trying to allocate "
-                 << strings::HumanReadableNumBytes(num_bytes)
-                 << ".  See logs for memory state.";
-  }
-  return nullptr;
-}
-
-void* GPUBFCAllocator::FindChunkPtr(BinNum bin_num, size_t rounded_bytes,
-                                    size_t num_bytes) {
-  // First identify the first bin that could satisfy rounded_bytes.
-  for (; bin_num < kNumBins; bin_num++) {
-    // Start searching from the first bin for the smallest chunk that fits
-    // rounded_bytes.
-    Bin* b = BinFromIndex(bin_num);
-    for (auto citer = b->free_chunks.begin(); citer != b->free_chunks.end();
-         ++citer) {
-      const GPUBFCAllocator::ChunkHandle h = (*citer);
-      GPUBFCAllocator::Chunk* chunk = ChunkFromHandle(h);
-      DCHECK(!chunk->in_use());
-      if (chunk->size >= rounded_bytes) {
-        // We found an existing chunk that fits us that wasn't in use, so remove
-        // it from the free bin structure prior to using.
-        RemoveFreeChunkIterFromBin(&b->free_chunks, citer);
-
-        // If we can break the size of the chunk into two reasonably
-        // large pieces, do so.
-        //
-        // TODO(vrv): What should be the criteria when deciding when
-        // to split?
-        if (chunk->size >= rounded_bytes * 2) {
-          SplitChunk(h, rounded_bytes);
-          chunk = ChunkFromHandle(h);  // Update chunk pointer in case it moved
-        }
-
-        // The requested size of the returned chunk is what the user
-        // has allocated.
-        chunk->requested_size = num_bytes;
-        // Assign a unique id and increment the id counter, marking the
-        // chunk as being in use.
-        chunk->allocation_id = next_allocation_id_++;
-
-        // Update stats.
-        ++stats_.num_allocs;
-        stats_.bytes_in_use += chunk->size;
-        stats_.max_bytes_in_use =
-            std::max(stats_.max_bytes_in_use, stats_.bytes_in_use);
-        stats_.max_alloc_size =
-            std::max<std::size_t>(stats_.max_alloc_size, chunk->size);
-
-        VLOG(4) << "Returning: " << chunk->ptr;
-        if (VLOG_IS_ON(4)) {
-          LOG(INFO) << "A: " << RenderOccupancy();
-        }
-        return chunk->ptr;
-      }
-    }
-  }
-
-  return nullptr;
-}
-
-void GPUBFCAllocator::SplitChunk(GPUBFCAllocator::ChunkHandle h,
-                                 size_t num_bytes) {
-  // Allocate the new chunk before we do any ChunkFromHandle
-  ChunkHandle h_new_chunk = AllocateChunk();
-
-  Chunk* c = ChunkFromHandle(h);
-  CHECK(!c->in_use() && (c->bin_num == kInvalidBinNum));
-
-  // Create a new chunk starting num_bytes after c
-  GPUBFCAllocator::Chunk* new_chunk = ChunkFromHandle(h_new_chunk);
-  new_chunk->ptr = static_cast<void*>(static_cast<char*>(c->ptr) + num_bytes);
-  region_manager_.set_handle(new_chunk->ptr, h_new_chunk);
-
-  // Set the new sizes of the chunks.
-  new_chunk->size = c->size - num_bytes;
-  c->size = num_bytes;
-
-  // The new chunk is not in use.
-  new_chunk->allocation_id = -1;
-
-  // Maintain the pointers.
-  // c <-> c_neighbor becomes
-  // c <-> new_chunk <-> c_neighbor
-  GPUBFCAllocator::ChunkHandle h_neighbor = c->next;
-  new_chunk->prev = h;
-  new_chunk->next = h_neighbor;
-  c->next = h_new_chunk;
-  if (h_neighbor != kInvalidChunkHandle) {
-    Chunk* c_neighbor = ChunkFromHandle(h_neighbor);
-    c_neighbor->prev = h_new_chunk;
-  }
-
-  // Add the newly free chunk to the free bin.
-  InsertFreeChunkIntoBin(h_new_chunk);
-}
-
-void GPUBFCAllocator::DeallocateRaw(void* ptr) {
-  DeallocateRawInternal(ptr);
-  retry_helper_.NotifyDealloc();
-}
-
-void GPUBFCAllocator::DeallocateRawInternal(void* ptr) {
-  if (ptr == nullptr) {
-    LOG(ERROR) << "tried to deallocate nullptr";
-    return;
-  }
-  mutex_lock l(lock_);
-
-  // Find the chunk from the ptr.
-  GPUBFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
-  CHECK(h != kInvalidChunkHandle);
-
-  // Consider coalescing it.
-  FreeAndMaybeCoalesce(h);
-
-  if (VLOG_IS_ON(4)) {
-    LOG(INFO) << "F: " << RenderOccupancy();
-  }
-}
-
-// Merges h1 and h2 when Chunk(h1)->next is h2 and Chunk(h2)->prev is c1.
-// We merge Chunk(h2) into Chunk(h1).
-void GPUBFCAllocator::Merge(GPUBFCAllocator::ChunkHandle h1,
-                            GPUBFCAllocator::ChunkHandle h2) {
-  Chunk* c1 = ChunkFromHandle(h1);
-  Chunk* c2 = ChunkFromHandle(h2);
-  // We can only merge chunks that are not in use.
-  CHECK(!c1->in_use() && !c2->in_use());
-
-  // c1's prev doesn't change, still points to the same ptr, and is
-  // still not in use.
-
-  // Fix up neighbor pointers
-  //
-  // c1 <-> c2 <-> c3 should become
-  // c1 <-> c3
-
-  GPUBFCAllocator::ChunkHandle h3 = c2->next;
-  c1->next = h3;
-  CHECK(c2->prev == h1);
-  if (h3 != kInvalidChunkHandle) {
-    GPUBFCAllocator::Chunk* c3 = ChunkFromHandle(h3);
-    c3->prev = h1;
-  }
-
-  // Set the new size
-  c1->size += c2->size;
-
-  DeleteChunk(h2);
-}
-
-void GPUBFCAllocator::DeleteChunk(ChunkHandle h) {
-  // Delete h and cleanup all state
-  Chunk* c = ChunkFromHandle(h);
-  //  VLOG(4) << "Removing: " << c->ptr;
-  region_manager_.erase(c->ptr);
-  DeallocateChunk(h);
-}
-
-void GPUBFCAllocator::InsertFreeChunkIntoBin(GPUBFCAllocator::ChunkHandle h) {
-  Chunk* c = ChunkFromHandle(h);
-  CHECK(!c->in_use() && (c->bin_num == kInvalidBinNum));
-  BinNum bin_num = BinNumForSize(c->size);
-  Bin* new_bin = BinFromIndex(bin_num);
-  c->bin_num = bin_num;
-  new_bin->free_chunks.insert(h);
-}
-
-void GPUBFCAllocator::RemoveFreeChunkIterFromBin(
-    GPUBFCAllocator::Bin::FreeChunkSet* free_chunks,
-    const GPUBFCAllocator::Bin::FreeChunkSet::iterator& citer) {
-  ChunkHandle h = *citer;
-  Chunk* c = ChunkFromHandle(h);
-  CHECK(!c->in_use() && (c->bin_num != kInvalidBinNum));
-  free_chunks->erase(citer);
-  c->bin_num = kInvalidBinNum;
-}
-
-void GPUBFCAllocator::RemoveFreeChunkFromBin(GPUBFCAllocator::ChunkHandle h) {
-  Chunk* c = ChunkFromHandle(h);
-  CHECK(!c->in_use() && (c->bin_num != kInvalidBinNum));
-  int count = BinFromIndex(c->bin_num)->free_chunks.erase(h);
-  CHECK(count > 0) << "Could not find chunk in bin";
-  c->bin_num = kInvalidBinNum;
-}
-
-void GPUBFCAllocator::FreeAndMaybeCoalesce(GPUBFCAllocator::ChunkHandle h) {
-  Chunk* c = ChunkFromHandle(h);
-  CHECK(c->in_use() && (c->bin_num == kInvalidBinNum));
-
-  // Mark the chunk as no longer in use
-  c->allocation_id = -1;
-
-  // Updates the stats.
-  stats_.bytes_in_use -= c->size;
-
-  // This chunk is no longer in-use, consider coalescing the chunk
-  // with adjacent chunks.
-  ChunkHandle chunk_to_reassign = h;
-
-  // If the next chunk is free, coalesce the two
-  if (c->next != kInvalidChunkHandle) {
-    Chunk* cnext = ChunkFromHandle(c->next);
-    if (!cnext->in_use()) {
-      //      VLOG(8) << "Chunk at " << cnext->ptr << " merging with c " <<
-      //      c->ptr;
-
-      chunk_to_reassign = h;
-
-      // Deletes c->next
-      RemoveFreeChunkFromBin(c->next);
-      Merge(h, ChunkFromHandle(h)->next);
-    }
-  }
-
-  // If the previous chunk is free, coalesce the two
-  c = ChunkFromHandle(h);
-  if (c->prev != kInvalidChunkHandle) {
-    Chunk* cprev = ChunkFromHandle(c->prev);
-    if (!cprev->in_use()) {
-      //      VLOG(8) << "Chunk at " << c->ptr << " merging into c->prev "
-      //       << cprev->ptr;
-
-      chunk_to_reassign = c->prev;
-
-      // Deletes c
-      RemoveFreeChunkFromBin(c->prev);
-      Merge(ChunkFromHandle(h)->prev, h);
-      c = ChunkFromHandle(h);
-    }
-  }
-
-  InsertFreeChunkIntoBin(chunk_to_reassign);
-}
-
-void GPUBFCAllocator::AddAllocVisitor(Visitor visitor) {
-  VLOG(1) << "AddVisitor";
-  mutex_lock l(lock_);
-  region_visitors_.push_back(visitor);
-  for (const auto& region : region_manager_.regions()) {
-    visitor(region.ptr(), region.memory_size());
-  }
-}
-
-bool GPUBFCAllocator::TracksAllocationSizes() { return true; }
-
-size_t GPUBFCAllocator::RequestedSize(void* ptr) {
-  mutex_lock l(lock_);
-  GPUBFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
-  CHECK(h != kInvalidChunkHandle)
-      << "Asked for requested size of pointer we never allocated: " << ptr;
-  GPUBFCAllocator::Chunk* c = ChunkFromHandle(h);
-  return c->requested_size;
-}
-
-size_t GPUBFCAllocator::AllocatedSize(void* ptr) {
-  mutex_lock l(lock_);
-  GPUBFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
-  CHECK(h != kInvalidChunkHandle)
-      << "Asked for allocated size of pointer we never allocated: " << ptr;
-  GPUBFCAllocator::Chunk* c = ChunkFromHandle(h);
-  return c->size;
-}
-
-int64 GPUBFCAllocator::AllocationId(void* ptr) {
-  mutex_lock l(lock_);
-  GPUBFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
-  CHECK(h != kInvalidChunkHandle)
-      << "Asked for allocation id of pointer we never allocated: " << ptr;
-  GPUBFCAllocator::Chunk* c = ChunkFromHandle(h);
-  return c->allocation_id;
-}
-
-namespace {
-
-void RenderRegion(char* rendered, const size_t resolution,
-                  const size_t total_render_size, const size_t offset,
-                  const void* base_ptr, const void* ptr, const size_t size,
-                  const char c) {
-  const char* base_ptr_c = static_cast<const char*>(base_ptr);
-  const char* ptr_c = static_cast<const char*>(ptr);
-
-  size_t start_location =
-      ((ptr_c - base_ptr_c + offset) * resolution) / total_render_size;
-  CHECK_GE(start_location, 0);
-  CHECK_LT(start_location, resolution);
-  size_t end_location =
-      ((ptr_c + size - 1 - base_ptr_c + offset) * resolution) /
-      total_render_size;
-  CHECK_GE(end_location, 0);
-  CHECK_LT(end_location, resolution);
-
-  for (size_t i = start_location; i <= end_location; ++i) {
-    rendered[i] = c;
-  }
-}
-
-}  // namespace
-
-string GPUBFCAllocator::RenderOccupancy() {
-  // Make a buffer for the ASCII-art representation.
-  const size_t resolution = 100;
-  char rendered[resolution];
-
-  // Compute the total region size to render over
-  size_t total_region_size = 0;
-  for (const auto& region : region_manager_.regions()) {
-    total_region_size += region.memory_size();
-  }
-
-  // Start out with everything empty
-  RenderRegion(rendered, resolution, total_region_size, 0, nullptr, nullptr,
-               total_region_size, '_');
-
-  size_t region_offset = 0;
-  for (const auto& region : region_manager_.regions()) {
-    ChunkHandle h = region_manager_.get_handle(region.ptr());
-    // Then render each chunk left to right.
-    while (h != kInvalidChunkHandle) {
-      Chunk* c = ChunkFromHandle(h);
-      if (c->in_use()) {
-        // Render the wasted space
-        size_t wasted = c->size - c->requested_size;
-        if (wasted > 0) {
-          RenderRegion(rendered, resolution, total_region_size,
-                       region_offset + c->requested_size, region.ptr(), c->ptr,
-                       wasted, 'x');
-        }
-        // Then the occupied space
-        RenderRegion(rendered, resolution, total_region_size, region_offset,
-                     region.ptr(), c->ptr, c->requested_size, '*');
-      }
-      h = c->next;
-    }
-    region_offset += region.memory_size();
-  }
-
-  return StringPiece(rendered, resolution).ToString();
-}
-
-void GPUBFCAllocator::DumpMemoryLog(size_t num_bytes) {
-  // For each bin: tally up the total number of chunks and bytes.
-  // Note that bins hold only free chunks.
-  for (BinNum bin_num = 0; bin_num < kNumBins; bin_num++) {
-    Bin* b = BinFromIndex(bin_num);
-
-    size_t total_bytes_in_use = 0;
-    size_t total_bytes_in_bin = 0;
-    size_t total_requested_bytes_in_use = 0;
-    size_t total_requested_bytes_in_bin = 0;
-    size_t total_chunks_in_use = 0;
-    size_t total_chunks_in_bin = 0;
-    for (ChunkHandle h : b->free_chunks) {
-      Chunk* c = ChunkFromHandle(h);
-      total_bytes_in_bin += c->size;
-      total_requested_bytes_in_bin += c->requested_size;
-      ++total_chunks_in_bin;
-      if (c->in_use()) {
-        total_bytes_in_use += c->size;
-        total_requested_bytes_in_use += c->requested_size;
-        ++total_chunks_in_use;
-      }
-    }
-
-    LOG(INFO) << "Bin (" << b->bin_size
-              << "): \tTotal Chunks: " << total_chunks_in_bin
-              << ", Chunks in use: " << total_chunks_in_use << " "
-              << strings::HumanReadableNumBytes(total_bytes_in_bin)
-              << " allocated for chunks. "
-              << strings::HumanReadableNumBytes(total_requested_bytes_in_bin)
-              << " client-requested for chunks. "
-              << strings::HumanReadableNumBytes(total_bytes_in_use)
-              << " in use in bin. "
-              << strings::HumanReadableNumBytes(total_requested_bytes_in_use)
-              << " client-requested in use in bin.";
-  }
-
-  // Find the bin that we would have liked to allocate in, so we
-  // can get some further analysis about fragmentation.
-  Bin* b = BinForSize(num_bytes);
-
-  LOG(INFO) << "Bin for " << strings::HumanReadableNumBytes(num_bytes)
-            << " was " << strings::HumanReadableNumBytes(b->bin_size)
-            << ", Chunk State: ";
-
-  for (ChunkHandle h : b->free_chunks) {
-    Chunk* c = ChunkFromHandle(h);
-    LOG(INFO) << c->DebugString(this, true);
-  }
-
-  // Next show the chunks that are in use, and also summarize their
-  // number by size.
-  std::map<size_t, int> in_use_by_size;
-  for (const auto& region : region_manager_.regions()) {
-    ChunkHandle h = region_manager_.get_handle(region.ptr());
-    while (h != kInvalidChunkHandle) {
-      const Chunk* c = ChunkFromHandle(h);
-      if (c->in_use()) {
-        in_use_by_size[c->size]++;
-        LOG(INFO) << "Chunk at " << c->ptr << " of size " << c->size;
-      }
-      h = c->next;
-    }
-
-    h = region_manager_.get_handle(region.ptr());
-    while (h != kInvalidChunkHandle) {
-      const Chunk* c = ChunkFromHandle(h);
-      if (!c->in_use()) {
-        LOG(INFO) << "Free at " << c->ptr << " of size " << c->size;
-      }
-      h = c->next;
-    }
-  }
-
-  LOG(INFO) << "     Summary of in-use Chunks by size: ";
-  size_t total_bytes = 0;
-  for (auto& it : in_use_by_size) {
-    LOG(INFO) << it.second << " Chunks of size " << it.first << " totalling "
-              << strings::HumanReadableNumBytes(it.first * it.second);
-    total_bytes += (it.first * it.second);
-  }
-  LOG(INFO) << "Sum Total of in-use chunks: "
-            << strings::HumanReadableNumBytes(total_bytes);
-  LOG(INFO) << "Stats: \n" << stats_.DebugString();
-}
-
-void GPUBFCAllocator::GetStats(AllocatorStats* stats) {
-  mutex_lock l(lock_);
-  *stats = stats_;
-}
+    : BFCAllocator(
+          new GPUMemAllocator(
+              GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie()),
+          total_memory, gpu_options.allow_growth(), "gpu_bfc") {}
 
 }  // namespace tensorflow
author	Xiaoqiang Zheng <zhengxq@google.com>	2016-03-18 14:34:33 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-03-18 15:47:15 -0700
commit	2b7ca5cb7e369afd69440e924e6e9d15854cb6ec (patch)
tree	803a49a2f2097e4f2cc89aeb199d973c63b44b48 /tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
parent	05ea40f180e528dbfde36cd338a0b6ac3cca6dd9 (diff)