Added the feature to disable MKL support of TensorFlow by environmental variable TF_DISABLE_MKL=1

author: Xiaoming (Jason) Cui <xiaoming.cui@intel.com> 2018-09-25 00:42:42 -0700
committer: Xiaoming (Jason) Cui <xiaoming.cui@intel.com> 2018-09-28 12:23:18 -0700
commit: b5feceb9058e06eac3de86ec45c44f5637054855 (patch)
tree: 08d828f1033dacec9d476a3b9a5b88c3af13622d /tensorflow/core/common_runtime
parent: 986193d79e00f1780fb3278ed890a72f7285f66e (diff)
3 files changed, 46 insertions, 17 deletions
diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
index 429b19599b..516138d28d 100644
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
@@ -27,6 +27,7 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/mem.h"
+#include "tensorflow/core/util/util.h"
 #include "tensorflow/core/platform/numa.h"
 
 #ifndef INTEL_MKL_DNN_ONLY
@@ -163,6 +164,12 @@ class MklCPUAllocator : public Allocator {
   }
 
   Status Initialize() {
+    if (DisableMKL()) {
+        VLOG(1) << "TF-MKL: Disabling pool allocator";
+        tf_disable_pool_allocator_flag_ = true;
+        return Status::OK();
+    }
+
     VLOG(2) << "MklCPUAllocator: In MklCPUAllocator";
 
     // Set upper bound on memory allocation to physical RAM available on the
@@ -217,6 +224,10 @@ class MklCPUAllocator : public Allocator {
   inline string Name() override { return kName; }
 
   inline void* AllocateRaw(size_t alignment, size_t num_bytes) override {
+    if (tf_disable_pool_allocator_flag_) {
+      return port::AlignedMalloc(num_bytes, alignment);
+    }
+
     // If the allocation size is less than threshold, call small allocator,
     // otherwise call large-size allocator (BFC). We found that BFC allocator
     // does not deliver good performance for small allocations when
@@ -227,6 +238,10 @@ class MklCPUAllocator : public Allocator {
   }
 
   inline void DeallocateRaw(void* ptr) override {
+    if (tf_disable_pool_allocator_flag_) {
+      port::AlignedFree(ptr);
+      return;
+    }
     // Check if ptr is for "small" allocation. If it is, then call Free
     // directly. Otherwise, call BFC to handle free.
     if (small_size_allocator_->IsSmallSizeAllocation(ptr)) {
@@ -237,26 +252,30 @@ class MklCPUAllocator : public Allocator {
   }
 
   void GetStats(AllocatorStats* stats) override {
-    AllocatorStats l_stats, s_stats;
-    small_size_allocator_->GetStats(&s_stats);
-    large_size_allocator_->GetStats(&l_stats);
-
-    // Combine statistics from small-size and large-size allocator.
-    stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs;
-    stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use;
-    stats->max_bytes_in_use =
-        l_stats.max_bytes_in_use + s_stats.max_bytes_in_use;
-
-    // Since small-size allocations go to MklSmallSizeAllocator,
-    // max_alloc_size from large_size_allocator would be the maximum
-    // size allocated by MklCPUAllocator.
-    stats->max_alloc_size = l_stats.max_alloc_size;
-    stats->bytes_limit = std::max(s_stats.bytes_limit, l_stats.bytes_limit);
+    if (!tf_disable_pool_allocator_flag_) {
+      AllocatorStats l_stats, s_stats;
+      small_size_allocator_->GetStats(&s_stats);
+      large_size_allocator_->GetStats(&l_stats);
+
+      // Combine statistics from small-size and large-size allocator.
+      stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs;
+      stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use;
+      stats->max_bytes_in_use =
+          l_stats.max_bytes_in_use + s_stats.max_bytes_in_use;
+
+      // Since small-size allocations go to MklSmallSizeAllocator,
+      // max_alloc_size from large_size_allocator would be the maximum
+      // size allocated by MklCPUAllocator.
+      stats->max_alloc_size = l_stats.max_alloc_size;
+      stats->bytes_limit = std::max(s_stats.bytes_limit, l_stats.bytes_limit);
+    }
   }
 
   void ClearStats() override {
-    small_size_allocator_->ClearStats();
-    large_size_allocator_->ClearStats();
+    if (!tf_disable_pool_allocator_flag_) {
+      small_size_allocator_->ClearStats();
+      large_size_allocator_->ClearStats();
+    }
   }
 
  private:
@@ -295,6 +314,7 @@ class MklCPUAllocator : public Allocator {
   // The alignment that we need for the allocations
   static constexpr const size_t kAlignment = 64;
 
+  bool tf_disable_pool_allocator_flag_ = false;
   Allocator* large_size_allocator_;              // owned by this class
   MklSmallSizeAllocator* small_size_allocator_;  // owned by this class.
 
diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc
index a5d31b75c7..60fa601907 100644
--- a/tensorflow/core/common_runtime/process_util.cc
+++ b/tensorflow/core/common_runtime/process_util.cc
@@ -28,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/tracing.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/util.h"
 
 namespace tensorflow {
 
@@ -56,6 +57,10 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
   const int32 inter_op = options.config.inter_op_parallelism_threads();
   if (inter_op != 0) return inter_op;
 #ifdef INTEL_MKL
+  // Early return if MKL is disabled
+  if (DisableMKL())
+    return port::NumSchedulableCPUs();
+
   // MKL library executes ops in parallel using OMP threads
   // Set inter_op conservatively to avoid thread oversubscription that could
   // lead to severe perf degradations and OMP resource exhaustion
diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc
index 8587d1783a..29c01d7f72 100644
--- a/tensorflow/core/common_runtime/threadpool_device.cc
+++ b/tensorflow/core/common_runtime/threadpool_device.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/core/platform/tracing.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/public/session_options.h"
+#include "tensorflow/core/util/util.h"
 
 #ifdef INTEL_MKL
 #ifdef _OPENMP
@@ -49,6 +50,9 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options,
       allocator_(allocator),
       scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {
 #ifdef INTEL_MKL
+  // Eearly return when MKL is disabled
+  if (DisableMKL())
+    return;
 #ifdef _OPENMP
   const char* user_omp_threads = getenv("OMP_NUM_THREADS");
   if (user_omp_threads == nullptr) {
author	Xiaoming (Jason) Cui <xiaoming.cui@intel.com>	2018-09-25 00:42:42 -0700
committer	Xiaoming (Jason) Cui <xiaoming.cui@intel.com>	2018-09-28 12:23:18 -0700
commit	b5feceb9058e06eac3de86ec45c44f5637054855 (patch)
tree	08d828f1033dacec9d476a3b9a5b88c3af13622d /tensorflow/core/common_runtime
parent	986193d79e00f1780fb3278ed890a72f7285f66e (diff)