aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/common_runtime
diff options
context:
space:
mode:
authorGravatar Xiaoming (Jason) Cui <xiaoming.cui@intel.com>2018-09-25 00:42:42 -0700
committerGravatar Xiaoming (Jason) Cui <xiaoming.cui@intel.com>2018-09-28 12:23:18 -0700
commitb5feceb9058e06eac3de86ec45c44f5637054855 (patch)
tree08d828f1033dacec9d476a3b9a5b88c3af13622d /tensorflow/core/common_runtime
parent986193d79e00f1780fb3278ed890a72f7285f66e (diff)
Added the feature to disable MKL support of TensorFlow by environmental variable TF_DISABLE_MKL=1
Diffstat (limited to 'tensorflow/core/common_runtime')
-rw-r--r--tensorflow/core/common_runtime/mkl_cpu_allocator.h54
-rw-r--r--tensorflow/core/common_runtime/process_util.cc5
-rw-r--r--tensorflow/core/common_runtime/threadpool_device.cc4
3 files changed, 46 insertions, 17 deletions
diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
index 429b19599b..516138d28d 100644
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
@@ -27,6 +27,7 @@ limitations under the License.
#include "tensorflow/core/lib/strings/numbers.h"
#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/platform/mem.h"
+#include "tensorflow/core/util/util.h"
#include "tensorflow/core/platform/numa.h"
#ifndef INTEL_MKL_DNN_ONLY
@@ -163,6 +164,12 @@ class MklCPUAllocator : public Allocator {
}
Status Initialize() {
+ if (DisableMKL()) {
+ VLOG(1) << "TF-MKL: Disabling pool allocator";
+ tf_disable_pool_allocator_flag_ = true;
+ return Status::OK();
+ }
+
VLOG(2) << "MklCPUAllocator: In MklCPUAllocator";
// Set upper bound on memory allocation to physical RAM available on the
@@ -217,6 +224,10 @@ class MklCPUAllocator : public Allocator {
inline string Name() override { return kName; }
inline void* AllocateRaw(size_t alignment, size_t num_bytes) override {
+ if (tf_disable_pool_allocator_flag_) {
+ return port::AlignedMalloc(num_bytes, alignment);
+ }
+
// If the allocation size is less than threshold, call small allocator,
// otherwise call large-size allocator (BFC). We found that BFC allocator
// does not deliver good performance for small allocations when
@@ -227,6 +238,10 @@ class MklCPUAllocator : public Allocator {
}
inline void DeallocateRaw(void* ptr) override {
+ if (tf_disable_pool_allocator_flag_) {
+ port::AlignedFree(ptr);
+ return;
+ }
// Check if ptr is for "small" allocation. If it is, then call Free
// directly. Otherwise, call BFC to handle free.
if (small_size_allocator_->IsSmallSizeAllocation(ptr)) {
@@ -237,26 +252,30 @@ class MklCPUAllocator : public Allocator {
}
void GetStats(AllocatorStats* stats) override {
- AllocatorStats l_stats, s_stats;
- small_size_allocator_->GetStats(&s_stats);
- large_size_allocator_->GetStats(&l_stats);
-
- // Combine statistics from small-size and large-size allocator.
- stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs;
- stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use;
- stats->max_bytes_in_use =
- l_stats.max_bytes_in_use + s_stats.max_bytes_in_use;
-
- // Since small-size allocations go to MklSmallSizeAllocator,
- // max_alloc_size from large_size_allocator would be the maximum
- // size allocated by MklCPUAllocator.
- stats->max_alloc_size = l_stats.max_alloc_size;
- stats->bytes_limit = std::max(s_stats.bytes_limit, l_stats.bytes_limit);
+ if (!tf_disable_pool_allocator_flag_) {
+ AllocatorStats l_stats, s_stats;
+ small_size_allocator_->GetStats(&s_stats);
+ large_size_allocator_->GetStats(&l_stats);
+
+ // Combine statistics from small-size and large-size allocator.
+ stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs;
+ stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use;
+ stats->max_bytes_in_use =
+ l_stats.max_bytes_in_use + s_stats.max_bytes_in_use;
+
+ // Since small-size allocations go to MklSmallSizeAllocator,
+ // max_alloc_size from large_size_allocator would be the maximum
+ // size allocated by MklCPUAllocator.
+ stats->max_alloc_size = l_stats.max_alloc_size;
+ stats->bytes_limit = std::max(s_stats.bytes_limit, l_stats.bytes_limit);
+ }
}
void ClearStats() override {
- small_size_allocator_->ClearStats();
- large_size_allocator_->ClearStats();
+ if (!tf_disable_pool_allocator_flag_) {
+ small_size_allocator_->ClearStats();
+ large_size_allocator_->ClearStats();
+ }
}
private:
@@ -295,6 +314,7 @@ class MklCPUAllocator : public Allocator {
// The alignment that we need for the allocations
static constexpr const size_t kAlignment = 64;
+ bool tf_disable_pool_allocator_flag_ = false;
Allocator* large_size_allocator_; // owned by this class
MklSmallSizeAllocator* small_size_allocator_; // owned by this class.
diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc
index a5d31b75c7..60fa601907 100644
--- a/tensorflow/core/common_runtime/process_util.cc
+++ b/tensorflow/core/common_runtime/process_util.cc
@@ -28,6 +28,7 @@ limitations under the License.
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/tracing.h"
#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/util.h"
namespace tensorflow {
@@ -56,6 +57,10 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
const int32 inter_op = options.config.inter_op_parallelism_threads();
if (inter_op != 0) return inter_op;
#ifdef INTEL_MKL
+ // Early return if MKL is disabled
+ if (DisableMKL())
+ return port::NumSchedulableCPUs();
+
// MKL library executes ops in parallel using OMP threads
// Set inter_op conservatively to avoid thread oversubscription that could
// lead to severe perf degradations and OMP resource exhaustion
diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc
index 8587d1783a..29c01d7f72 100644
--- a/tensorflow/core/common_runtime/threadpool_device.cc
+++ b/tensorflow/core/common_runtime/threadpool_device.cc
@@ -29,6 +29,7 @@ limitations under the License.
#include "tensorflow/core/platform/tracing.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/public/session_options.h"
+#include "tensorflow/core/util/util.h"
#ifdef INTEL_MKL
#ifdef _OPENMP
@@ -49,6 +50,9 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options,
allocator_(allocator),
scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {
#ifdef INTEL_MKL
+ // Eearly return when MKL is disabled
+ if (DisableMKL())
+ return;
#ifdef _OPENMP
const char* user_omp_threads = getenv("OMP_NUM_THREADS");
if (user_omp_threads == nullptr) {