diff options
author | Xiaoming (Jason) Cui <xiaoming.cui@intel.com> | 2018-09-25 00:42:42 -0700 |
---|---|---|
committer | Xiaoming (Jason) Cui <xiaoming.cui@intel.com> | 2018-09-28 12:23:18 -0700 |
commit | b5feceb9058e06eac3de86ec45c44f5637054855 (patch) | |
tree | 08d828f1033dacec9d476a3b9a5b88c3af13622d /tensorflow/core/common_runtime | |
parent | 986193d79e00f1780fb3278ed890a72f7285f66e (diff) |
Added the feature to disable MKL support of TensorFlow by environmental variable TF_DISABLE_MKL=1
Diffstat (limited to 'tensorflow/core/common_runtime')
-rw-r--r-- | tensorflow/core/common_runtime/mkl_cpu_allocator.h | 54 | ||||
-rw-r--r-- | tensorflow/core/common_runtime/process_util.cc | 5 | ||||
-rw-r--r-- | tensorflow/core/common_runtime/threadpool_device.cc | 4 |
3 files changed, 46 insertions, 17 deletions
diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index 429b19599b..516138d28d 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/mem.h" +#include "tensorflow/core/util/util.h" #include "tensorflow/core/platform/numa.h" #ifndef INTEL_MKL_DNN_ONLY @@ -163,6 +164,12 @@ class MklCPUAllocator : public Allocator { } Status Initialize() { + if (DisableMKL()) { + VLOG(1) << "TF-MKL: Disabling pool allocator"; + tf_disable_pool_allocator_flag_ = true; + return Status::OK(); + } + VLOG(2) << "MklCPUAllocator: In MklCPUAllocator"; // Set upper bound on memory allocation to physical RAM available on the @@ -217,6 +224,10 @@ class MklCPUAllocator : public Allocator { inline string Name() override { return kName; } inline void* AllocateRaw(size_t alignment, size_t num_bytes) override { + if (tf_disable_pool_allocator_flag_) { + return port::AlignedMalloc(num_bytes, alignment); + } + // If the allocation size is less than threshold, call small allocator, // otherwise call large-size allocator (BFC). We found that BFC allocator // does not deliver good performance for small allocations when @@ -227,6 +238,10 @@ class MklCPUAllocator : public Allocator { } inline void DeallocateRaw(void* ptr) override { + if (tf_disable_pool_allocator_flag_) { + port::AlignedFree(ptr); + return; + } // Check if ptr is for "small" allocation. If it is, then call Free // directly. Otherwise, call BFC to handle free. if (small_size_allocator_->IsSmallSizeAllocation(ptr)) { @@ -237,26 +252,30 @@ class MklCPUAllocator : public Allocator { } void GetStats(AllocatorStats* stats) override { - AllocatorStats l_stats, s_stats; - small_size_allocator_->GetStats(&s_stats); - large_size_allocator_->GetStats(&l_stats); - - // Combine statistics from small-size and large-size allocator. - stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs; - stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use; - stats->max_bytes_in_use = - l_stats.max_bytes_in_use + s_stats.max_bytes_in_use; - - // Since small-size allocations go to MklSmallSizeAllocator, - // max_alloc_size from large_size_allocator would be the maximum - // size allocated by MklCPUAllocator. - stats->max_alloc_size = l_stats.max_alloc_size; - stats->bytes_limit = std::max(s_stats.bytes_limit, l_stats.bytes_limit); + if (!tf_disable_pool_allocator_flag_) { + AllocatorStats l_stats, s_stats; + small_size_allocator_->GetStats(&s_stats); + large_size_allocator_->GetStats(&l_stats); + + // Combine statistics from small-size and large-size allocator. + stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs; + stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use; + stats->max_bytes_in_use = + l_stats.max_bytes_in_use + s_stats.max_bytes_in_use; + + // Since small-size allocations go to MklSmallSizeAllocator, + // max_alloc_size from large_size_allocator would be the maximum + // size allocated by MklCPUAllocator. + stats->max_alloc_size = l_stats.max_alloc_size; + stats->bytes_limit = std::max(s_stats.bytes_limit, l_stats.bytes_limit); + } } void ClearStats() override { - small_size_allocator_->ClearStats(); - large_size_allocator_->ClearStats(); + if (!tf_disable_pool_allocator_flag_) { + small_size_allocator_->ClearStats(); + large_size_allocator_->ClearStats(); + } } private: @@ -295,6 +314,7 @@ class MklCPUAllocator : public Allocator { // The alignment that we need for the allocations static constexpr const size_t kAlignment = 64; + bool tf_disable_pool_allocator_flag_ = false; Allocator* large_size_allocator_; // owned by this class MklSmallSizeAllocator* small_size_allocator_; // owned by this class. diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc index a5d31b75c7..60fa601907 100644 --- a/tensorflow/core/common_runtime/process_util.cc +++ b/tensorflow/core/common_runtime/process_util.cc @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/util.h" namespace tensorflow { @@ -56,6 +57,10 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) { const int32 inter_op = options.config.inter_op_parallelism_threads(); if (inter_op != 0) return inter_op; #ifdef INTEL_MKL + // Early return if MKL is disabled + if (DisableMKL()) + return port::NumSchedulableCPUs(); + // MKL library executes ops in parallel using OMP threads // Set inter_op conservatively to avoid thread oversubscription that could // lead to severe perf degradations and OMP resource exhaustion diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc index 8587d1783a..29c01d7f72 100644 --- a/tensorflow/core/common_runtime/threadpool_device.cc +++ b/tensorflow/core/common_runtime/threadpool_device.cc @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/util/util.h" #ifdef INTEL_MKL #ifdef _OPENMP @@ -49,6 +50,9 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options, allocator_(allocator), scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) { #ifdef INTEL_MKL + // Eearly return when MKL is disabled + if (DisableMKL()) + return; #ifdef _OPENMP const char* user_omp_threads = getenv("OMP_NUM_THREADS"); if (user_omp_threads == nullptr) { |