aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/common_runtime/mkl_cpu_allocator.h
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-09-17 17:49:36 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-09-17 17:53:46 -0700
commit185aa89912376d4088c22615908696cd30f9951b (patch)
treeba465945c19904129b830cc2465a3ee7b58b9247 /tensorflow/core/common_runtime/mkl_cpu_allocator.h
parent4338803b98cd825b0b1d810bcc51c9a79734feb6 (diff)
Eliminate VisitableAllocator.
The visitor pattern is used to allow pre-registration of memory for DMA access, e.g. for fast GPU/CPU i/o and for RDMA networking. The VisitableAllocator interface was introduced to support this use some time ago, prior to SubAllocators. Memory registration works best if it's done infrequently, on large pieces of memory, rather than on every piece that's dynamically allocated/freed. This usage pattern fits the SubAllocator better than a general Allocator. This change moves memory allocation visitor access to SubAllocator and eliminates the VisitableAllocator subclass of Allocator. This change also more rigorously enforces the requirement that all Visitors be declared prior to memory allocation begining. This is accomplished by requiring that Visitors be provided to the SubAllocator constructor. This refactoring will ease an upcoming CL introducing NUMA specific CPU devices. It also should fix some performance pitfalls (e.g. accidental use of PoolAllocator) introduced by an earlier refactoring of ProcessState that was also in preparation for NUMA. It restores the default use of the cpu_allocator() value (i.e. no SubAllocator) by model executions that don't use allocation visitors (since visitor registration must precede the first allocation, hence can be detected at that time). PiperOrigin-RevId: 213371553
Diffstat (limited to 'tensorflow/core/common_runtime/mkl_cpu_allocator.h')
-rw-r--r--tensorflow/core/common_runtime/mkl_cpu_allocator.h50
1 files changed, 7 insertions, 43 deletions
diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
index df9c3a686c..538a70668a 100644
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
@@ -23,12 +23,11 @@ limitations under the License.
#include <cstdlib>
#include "tensorflow/core/common_runtime/bfc_allocator.h"
-#include "tensorflow/core/common_runtime/visitable_allocator.h"
-#include "tensorflow/core/framework/allocator_registry.h"
+#include "tensorflow/core/common_runtime/pool_allocator.h"
#include "tensorflow/core/lib/strings/numbers.h"
#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/platform/mem.h"
-#include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/numa.h"
#ifndef INTEL_MKL_DNN_ONLY
#include "i_malloc.h"
@@ -40,20 +39,16 @@ typedef unsigned int uint;
namespace tensorflow {
-class MklSubAllocator : public SubAllocator {
+class MklSubAllocator : public BasicCPUAllocator {
public:
+ MklSubAllocator() : BasicCPUAllocator(port::kNUMANoAffinity, {}, {}) {}
~MklSubAllocator() override {}
-
- void* Alloc(size_t alignment, size_t num_bytes) override {
- return port::AlignedMalloc(num_bytes, alignment);
- }
- void Free(void* ptr, size_t num_bytes) override { port::AlignedFree(ptr); }
};
// CPU allocator that handles small-size allocations by calling
// suballocator directly. Mostly, it is just a wrapper around a suballocator
// (that calls malloc and free directly) with support for bookkeeping.
-class MklSmallSizeAllocator : public VisitableAllocator {
+class MklSmallSizeAllocator : public Allocator {
public:
MklSmallSizeAllocator(SubAllocator* sub_allocator, size_t total_memory,
const string& name)
@@ -75,10 +70,6 @@ class MklSmallSizeAllocator : public VisitableAllocator {
CHECK(map_.insert(map_val).second);
// Increment statistics for small-size allocations.
IncrementStats(num_bytes);
- // Call alloc visitors.
- for (const auto& visitor : alloc_visitors_) {
- visitor(ptr, num_bytes);
- }
}
return ptr;
}
@@ -94,9 +85,6 @@ class MklSmallSizeAllocator : public VisitableAllocator {
if (map_iter != map_.end()) {
// Call free visitors.
size_t dealloc_bytes = map_iter->second;
- for (const auto& visitor : free_visitors_) {
- visitor(ptr, dealloc_bytes);
- }
sub_allocator_->Free(ptr, dealloc_bytes);
DecrementStats(dealloc_bytes);
map_.erase(map_iter);
@@ -121,16 +109,6 @@ class MklSmallSizeAllocator : public VisitableAllocator {
stats_.Clear();
}
- void AddAllocVisitor(Visitor visitor) override {
- mutex_lock l(mutex_);
- alloc_visitors_.push_back(visitor);
- }
-
- void AddFreeVisitor(Visitor visitor) override {
- mutex_lock l(mutex_);
- free_visitors_.push_back(visitor);
- }
-
private:
// Increment statistics for the allocator handling small allocations.
inline void IncrementStats(size_t alloc_size)
@@ -163,15 +141,11 @@ class MklSmallSizeAllocator : public VisitableAllocator {
// Allocator stats for small allocs
AllocatorStats stats_ GUARDED_BY(mutex_);
-
- // Visitors
- std::vector<Visitor> alloc_visitors_ GUARDED_BY(mutex_);
- std::vector<Visitor> free_visitors_ GUARDED_BY(mutex_);
};
/// CPU allocator for MKL that wraps BFC allocator and intercepts
/// and redirects memory allocation calls from MKL.
-class MklCPUAllocator : public VisitableAllocator {
+class MklCPUAllocator : public Allocator {
public:
// Constructor and other standard functions
@@ -284,16 +258,6 @@ class MklCPUAllocator : public VisitableAllocator {
large_size_allocator_->ClearStats();
}
- void AddAllocVisitor(Visitor visitor) override {
- small_size_allocator_->AddAllocVisitor(visitor);
- large_size_allocator_->AddAllocVisitor(visitor);
- }
-
- void AddFreeVisitor(Visitor visitor) override {
- small_size_allocator_->AddFreeVisitor(visitor);
- large_size_allocator_->AddFreeVisitor(visitor);
- }
-
private:
// Hooks provided by this allocator for memory allocation routines from MKL
@@ -330,7 +294,7 @@ class MklCPUAllocator : public VisitableAllocator {
// The alignment that we need for the allocations
static constexpr const size_t kAlignment = 64;
- VisitableAllocator* large_size_allocator_; // owned by this class
+ Allocator* large_size_allocator_; // owned by this class
MklSmallSizeAllocator* small_size_allocator_; // owned by this class.
SubAllocator* sub_allocator_; // not owned by this class