diff options
author | 2018-09-17 17:49:36 -0700 | |
---|---|---|
committer | 2018-09-17 17:53:46 -0700 | |
commit | 185aa89912376d4088c22615908696cd30f9951b (patch) | |
tree | ba465945c19904129b830cc2465a3ee7b58b9247 /tensorflow/core/common_runtime/mkl_cpu_allocator.h | |
parent | 4338803b98cd825b0b1d810bcc51c9a79734feb6 (diff) |
Eliminate VisitableAllocator.
The visitor pattern is used to allow pre-registration of memory for
DMA access, e.g. for fast GPU/CPU i/o and for RDMA networking. The
VisitableAllocator interface was introduced to support this use some
time ago, prior to SubAllocators. Memory registration works best if
it's done infrequently, on large pieces of memory, rather than on
every piece that's dynamically allocated/freed. This usage pattern
fits the SubAllocator better than a general Allocator. This change
moves memory allocation visitor access to SubAllocator and eliminates
the VisitableAllocator subclass of Allocator.
This change also more rigorously enforces the requirement that all
Visitors be declared prior to memory allocation begining. This is
accomplished by requiring that Visitors be provided to the SubAllocator
constructor.
This refactoring will ease an upcoming CL introducing
NUMA specific CPU devices. It also should fix some performance
pitfalls (e.g. accidental use of PoolAllocator) introduced by an
earlier refactoring of ProcessState that was also in preparation for
NUMA. It restores the default use of the cpu_allocator() value (i.e.
no SubAllocator) by model executions that don't use allocation
visitors (since visitor registration must precede the first allocation,
hence can be detected at that time).
PiperOrigin-RevId: 213371553
Diffstat (limited to 'tensorflow/core/common_runtime/mkl_cpu_allocator.h')
-rw-r--r-- | tensorflow/core/common_runtime/mkl_cpu_allocator.h | 50 |
1 files changed, 7 insertions, 43 deletions
diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index df9c3a686c..538a70668a 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -23,12 +23,11 @@ limitations under the License. #include <cstdlib> #include "tensorflow/core/common_runtime/bfc_allocator.h" -#include "tensorflow/core/common_runtime/visitable_allocator.h" -#include "tensorflow/core/framework/allocator_registry.h" +#include "tensorflow/core/common_runtime/pool_allocator.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/mem.h" -#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/numa.h" #ifndef INTEL_MKL_DNN_ONLY #include "i_malloc.h" @@ -40,20 +39,16 @@ typedef unsigned int uint; namespace tensorflow { -class MklSubAllocator : public SubAllocator { +class MklSubAllocator : public BasicCPUAllocator { public: + MklSubAllocator() : BasicCPUAllocator(port::kNUMANoAffinity, {}, {}) {} ~MklSubAllocator() override {} - - void* Alloc(size_t alignment, size_t num_bytes) override { - return port::AlignedMalloc(num_bytes, alignment); - } - void Free(void* ptr, size_t num_bytes) override { port::AlignedFree(ptr); } }; // CPU allocator that handles small-size allocations by calling // suballocator directly. Mostly, it is just a wrapper around a suballocator // (that calls malloc and free directly) with support for bookkeeping. -class MklSmallSizeAllocator : public VisitableAllocator { +class MklSmallSizeAllocator : public Allocator { public: MklSmallSizeAllocator(SubAllocator* sub_allocator, size_t total_memory, const string& name) @@ -75,10 +70,6 @@ class MklSmallSizeAllocator : public VisitableAllocator { CHECK(map_.insert(map_val).second); // Increment statistics for small-size allocations. IncrementStats(num_bytes); - // Call alloc visitors. - for (const auto& visitor : alloc_visitors_) { - visitor(ptr, num_bytes); - } } return ptr; } @@ -94,9 +85,6 @@ class MklSmallSizeAllocator : public VisitableAllocator { if (map_iter != map_.end()) { // Call free visitors. size_t dealloc_bytes = map_iter->second; - for (const auto& visitor : free_visitors_) { - visitor(ptr, dealloc_bytes); - } sub_allocator_->Free(ptr, dealloc_bytes); DecrementStats(dealloc_bytes); map_.erase(map_iter); @@ -121,16 +109,6 @@ class MklSmallSizeAllocator : public VisitableAllocator { stats_.Clear(); } - void AddAllocVisitor(Visitor visitor) override { - mutex_lock l(mutex_); - alloc_visitors_.push_back(visitor); - } - - void AddFreeVisitor(Visitor visitor) override { - mutex_lock l(mutex_); - free_visitors_.push_back(visitor); - } - private: // Increment statistics for the allocator handling small allocations. inline void IncrementStats(size_t alloc_size) @@ -163,15 +141,11 @@ class MklSmallSizeAllocator : public VisitableAllocator { // Allocator stats for small allocs AllocatorStats stats_ GUARDED_BY(mutex_); - - // Visitors - std::vector<Visitor> alloc_visitors_ GUARDED_BY(mutex_); - std::vector<Visitor> free_visitors_ GUARDED_BY(mutex_); }; /// CPU allocator for MKL that wraps BFC allocator and intercepts /// and redirects memory allocation calls from MKL. -class MklCPUAllocator : public VisitableAllocator { +class MklCPUAllocator : public Allocator { public: // Constructor and other standard functions @@ -284,16 +258,6 @@ class MklCPUAllocator : public VisitableAllocator { large_size_allocator_->ClearStats(); } - void AddAllocVisitor(Visitor visitor) override { - small_size_allocator_->AddAllocVisitor(visitor); - large_size_allocator_->AddAllocVisitor(visitor); - } - - void AddFreeVisitor(Visitor visitor) override { - small_size_allocator_->AddFreeVisitor(visitor); - large_size_allocator_->AddFreeVisitor(visitor); - } - private: // Hooks provided by this allocator for memory allocation routines from MKL @@ -330,7 +294,7 @@ class MklCPUAllocator : public VisitableAllocator { // The alignment that we need for the allocations static constexpr const size_t kAlignment = 64; - VisitableAllocator* large_size_allocator_; // owned by this class + Allocator* large_size_allocator_; // owned by this class MklSmallSizeAllocator* small_size_allocator_; // owned by this class. SubAllocator* sub_allocator_; // not owned by this class |