Lower the MKLCpuAllocator priority so that it can use default allocator when MKL is disabled, and with some minor changes

author: Xiaoming (Jason) Cui <xiaoming.cui@intel.com> 2018-09-28 19:49:23 -0700
committer: Xiaoming (Jason) Cui <xiaoming.cui@intel.com> 2018-09-28 19:49:23 -0700
commit: d936d819752916d3122f02def571ecac9e995029 (patch)
tree: 5b86fe5b82d0ddea4569afe36fb4dbd668b736da /tensorflow/core/common_runtime
parent: a287961cffcb9ae1a0675f4e18d14674dfae130a (diff)
3 files changed, 36 insertions, 59 deletions
diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
index 516138d28d..429b19599b 100644
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
@@ -27,7 +27,6 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/mem.h"
-#include "tensorflow/core/util/util.h"
 #include "tensorflow/core/platform/numa.h"
 
 #ifndef INTEL_MKL_DNN_ONLY
@@ -164,12 +163,6 @@ class MklCPUAllocator : public Allocator {
   }
 
   Status Initialize() {
-    if (DisableMKL()) {
-        VLOG(1) << "TF-MKL: Disabling pool allocator";
-        tf_disable_pool_allocator_flag_ = true;
-        return Status::OK();
-    }
-
     VLOG(2) << "MklCPUAllocator: In MklCPUAllocator";
 
     // Set upper bound on memory allocation to physical RAM available on the
@@ -224,10 +217,6 @@ class MklCPUAllocator : public Allocator {
   inline string Name() override { return kName; }
 
   inline void* AllocateRaw(size_t alignment, size_t num_bytes) override {
-    if (tf_disable_pool_allocator_flag_) {
-      return port::AlignedMalloc(num_bytes, alignment);
-    }
-
     // If the allocation size is less than threshold, call small allocator,
     // otherwise call large-size allocator (BFC). We found that BFC allocator
     // does not deliver good performance for small allocations when
@@ -238,10 +227,6 @@ class MklCPUAllocator : public Allocator {
   }
 
   inline void DeallocateRaw(void* ptr) override {
-    if (tf_disable_pool_allocator_flag_) {
-      port::AlignedFree(ptr);
-      return;
-    }
     // Check if ptr is for "small" allocation. If it is, then call Free
     // directly. Otherwise, call BFC to handle free.
     if (small_size_allocator_->IsSmallSizeAllocation(ptr)) {
@@ -252,30 +237,26 @@ class MklCPUAllocator : public Allocator {
   }
 
   void GetStats(AllocatorStats* stats) override {
-    if (!tf_disable_pool_allocator_flag_) {
-      AllocatorStats l_stats, s_stats;
-      small_size_allocator_->GetStats(&s_stats);
-      large_size_allocator_->GetStats(&l_stats);
-
-      // Combine statistics from small-size and large-size allocator.
-      stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs;
-      stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use;
-      stats->max_bytes_in_use =
-          l_stats.max_bytes_in_use + s_stats.max_bytes_in_use;
-
-      // Since small-size allocations go to MklSmallSizeAllocator,
-      // max_alloc_size from large_size_allocator would be the maximum
-      // size allocated by MklCPUAllocator.
-      stats->max_alloc_size = l_stats.max_alloc_size;
-      stats->bytes_limit = std::max(s_stats.bytes_limit, l_stats.bytes_limit);
-    }
+    AllocatorStats l_stats, s_stats;
+    small_size_allocator_->GetStats(&s_stats);
+    large_size_allocator_->GetStats(&l_stats);
+
+    // Combine statistics from small-size and large-size allocator.
+    stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs;
+    stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use;
+    stats->max_bytes_in_use =
+        l_stats.max_bytes_in_use + s_stats.max_bytes_in_use;
+
+    // Since small-size allocations go to MklSmallSizeAllocator,
+    // max_alloc_size from large_size_allocator would be the maximum
+    // size allocated by MklCPUAllocator.
+    stats->max_alloc_size = l_stats.max_alloc_size;
+    stats->bytes_limit = std::max(s_stats.bytes_limit, l_stats.bytes_limit);
   }
 
   void ClearStats() override {
-    if (!tf_disable_pool_allocator_flag_) {
-      small_size_allocator_->ClearStats();
-      large_size_allocator_->ClearStats();
-    }
+    small_size_allocator_->ClearStats();
+    large_size_allocator_->ClearStats();
   }
 
  private:
@@ -314,7 +295,6 @@ class MklCPUAllocator : public Allocator {
   // The alignment that we need for the allocations
   static constexpr const size_t kAlignment = 64;
 
-  bool tf_disable_pool_allocator_flag_ = false;
   Allocator* large_size_allocator_;              // owned by this class
   MklSmallSizeAllocator* small_size_allocator_;  // owned by this class.
 
diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc
index 60fa601907..b3064a4c08 100644
--- a/tensorflow/core/common_runtime/process_util.cc
+++ b/tensorflow/core/common_runtime/process_util.cc
@@ -57,28 +57,25 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
   const int32 inter_op = options.config.inter_op_parallelism_threads();
   if (inter_op != 0) return inter_op;
 #ifdef INTEL_MKL
-  // Early return if MKL is disabled
-  if (DisableMKL())
-    return port::NumSchedulableCPUs();
-
-  // MKL library executes ops in parallel using OMP threads
-  // Set inter_op conservatively to avoid thread oversubscription that could
-  // lead to severe perf degradations and OMP resource exhaustion
-  int mkl_intra_op = 1;
-#ifdef _OPENMP
-  mkl_intra_op = omp_get_max_threads();
-#endif  // _OPENMP
-  CHECK_GE(mkl_intra_op, 1);
-  const int32 mkl_inter_op = std::max(
-      (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2);
-  VLOG(0) << "Creating new thread pool with default inter op setting: "
-          << mkl_inter_op
-          << ". Tune using inter_op_parallelism_threads for best performance.";
-  return mkl_inter_op;
-#else
+  if (!DisableMKL()) {
+    // MKL library executes ops in parallel using OMP threads
+    // Set inter_op conservatively to avoid thread oversubscription that could
+    // lead to severe perf degradations and OMP resource exhaustion
+    int mkl_intra_op = 1;
+  #ifdef _OPENMP
+    mkl_intra_op = omp_get_max_threads();
+  #endif  // _OPENMP
+    CHECK_GE(mkl_intra_op, 1);
+    const int32 mkl_inter_op = std::max(
+        (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2);
+    VLOG(0) << "Creating new thread pool with default inter op setting: "
+            << mkl_inter_op
+            << ". Tune using inter_op_parallelism_threads for best performance.";
+    return mkl_inter_op;
+  }
+#endif  // INTEL_MKL
   // Default to using the number of cores available in the process.
   return port::NumSchedulableCPUs();
-#endif  // INTEL_MKL
 }
 
 thread::ThreadPool* NewThreadPoolFromSessionOptions(
diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc
index 29c01d7f72..f188016610 100644
--- a/tensorflow/core/common_runtime/threadpool_device.cc
+++ b/tensorflow/core/common_runtime/threadpool_device.cc
@@ -50,7 +50,7 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options,
       allocator_(allocator),
       scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {
 #ifdef INTEL_MKL
-  // Eearly return when MKL is disabled
+  // Early return when MKL is disabled
   if (DisableMKL())
     return;
 #ifdef _OPENMP
@@ -118,7 +118,7 @@ class MklCPUAllocatorFactory : public AllocatorFactory {
 };
 
 #ifdef ENABLE_MKL
-REGISTER_MEM_ALLOCATOR("MklCPUAllocator", 200, MklCPUAllocatorFactory);
+REGISTER_MEM_ALLOCATOR("MklCPUAllocator", (DisableMKL() ? 50 : 200), MklCPUAllocatorFactory);
 #endif  // ENABLE_MKL
 
 }  // namespace
author	Xiaoming (Jason) Cui <xiaoming.cui@intel.com>	2018-09-28 19:49:23 -0700
committer	Xiaoming (Jason) Cui <xiaoming.cui@intel.com>	2018-09-28 19:49:23 -0700
commit	d936d819752916d3122f02def571ecac9e995029 (patch)
tree	5b86fe5b82d0ddea4569afe36fb4dbd668b736da /tensorflow/core/common_runtime
parent	a287961cffcb9ae1a0675f4e18d14674dfae130a (diff)