aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/common_runtime
diff options
context:
space:
mode:
authorGravatar Xiaoming (Jason) Cui <xiaoming.cui@intel.com>2018-09-28 19:49:23 -0700
committerGravatar Xiaoming (Jason) Cui <xiaoming.cui@intel.com>2018-09-28 19:49:23 -0700
commitd936d819752916d3122f02def571ecac9e995029 (patch)
tree5b86fe5b82d0ddea4569afe36fb4dbd668b736da /tensorflow/core/common_runtime
parenta287961cffcb9ae1a0675f4e18d14674dfae130a (diff)
Lower the MKLCpuAllocator priority so that it can use default allocator when MKL is disabled, and with some minor changes
Diffstat (limited to 'tensorflow/core/common_runtime')
-rw-r--r--tensorflow/core/common_runtime/mkl_cpu_allocator.h54
-rw-r--r--tensorflow/core/common_runtime/process_util.cc37
-rw-r--r--tensorflow/core/common_runtime/threadpool_device.cc4
3 files changed, 36 insertions, 59 deletions
diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
index 516138d28d..429b19599b 100644
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
@@ -27,7 +27,6 @@ limitations under the License.
#include "tensorflow/core/lib/strings/numbers.h"
#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/platform/mem.h"
-#include "tensorflow/core/util/util.h"
#include "tensorflow/core/platform/numa.h"
#ifndef INTEL_MKL_DNN_ONLY
@@ -164,12 +163,6 @@ class MklCPUAllocator : public Allocator {
}
Status Initialize() {
- if (DisableMKL()) {
- VLOG(1) << "TF-MKL: Disabling pool allocator";
- tf_disable_pool_allocator_flag_ = true;
- return Status::OK();
- }
-
VLOG(2) << "MklCPUAllocator: In MklCPUAllocator";
// Set upper bound on memory allocation to physical RAM available on the
@@ -224,10 +217,6 @@ class MklCPUAllocator : public Allocator {
inline string Name() override { return kName; }
inline void* AllocateRaw(size_t alignment, size_t num_bytes) override {
- if (tf_disable_pool_allocator_flag_) {
- return port::AlignedMalloc(num_bytes, alignment);
- }
-
// If the allocation size is less than threshold, call small allocator,
// otherwise call large-size allocator (BFC). We found that BFC allocator
// does not deliver good performance for small allocations when
@@ -238,10 +227,6 @@ class MklCPUAllocator : public Allocator {
}
inline void DeallocateRaw(void* ptr) override {
- if (tf_disable_pool_allocator_flag_) {
- port::AlignedFree(ptr);
- return;
- }
// Check if ptr is for "small" allocation. If it is, then call Free
// directly. Otherwise, call BFC to handle free.
if (small_size_allocator_->IsSmallSizeAllocation(ptr)) {
@@ -252,30 +237,26 @@ class MklCPUAllocator : public Allocator {
}
void GetStats(AllocatorStats* stats) override {
- if (!tf_disable_pool_allocator_flag_) {
- AllocatorStats l_stats, s_stats;
- small_size_allocator_->GetStats(&s_stats);
- large_size_allocator_->GetStats(&l_stats);
-
- // Combine statistics from small-size and large-size allocator.
- stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs;
- stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use;
- stats->max_bytes_in_use =
- l_stats.max_bytes_in_use + s_stats.max_bytes_in_use;
-
- // Since small-size allocations go to MklSmallSizeAllocator,
- // max_alloc_size from large_size_allocator would be the maximum
- // size allocated by MklCPUAllocator.
- stats->max_alloc_size = l_stats.max_alloc_size;
- stats->bytes_limit = std::max(s_stats.bytes_limit, l_stats.bytes_limit);
- }
+ AllocatorStats l_stats, s_stats;
+ small_size_allocator_->GetStats(&s_stats);
+ large_size_allocator_->GetStats(&l_stats);
+
+ // Combine statistics from small-size and large-size allocator.
+ stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs;
+ stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use;
+ stats->max_bytes_in_use =
+ l_stats.max_bytes_in_use + s_stats.max_bytes_in_use;
+
+ // Since small-size allocations go to MklSmallSizeAllocator,
+ // max_alloc_size from large_size_allocator would be the maximum
+ // size allocated by MklCPUAllocator.
+ stats->max_alloc_size = l_stats.max_alloc_size;
+ stats->bytes_limit = std::max(s_stats.bytes_limit, l_stats.bytes_limit);
}
void ClearStats() override {
- if (!tf_disable_pool_allocator_flag_) {
- small_size_allocator_->ClearStats();
- large_size_allocator_->ClearStats();
- }
+ small_size_allocator_->ClearStats();
+ large_size_allocator_->ClearStats();
}
private:
@@ -314,7 +295,6 @@ class MklCPUAllocator : public Allocator {
// The alignment that we need for the allocations
static constexpr const size_t kAlignment = 64;
- bool tf_disable_pool_allocator_flag_ = false;
Allocator* large_size_allocator_; // owned by this class
MklSmallSizeAllocator* small_size_allocator_; // owned by this class.
diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc
index 60fa601907..b3064a4c08 100644
--- a/tensorflow/core/common_runtime/process_util.cc
+++ b/tensorflow/core/common_runtime/process_util.cc
@@ -57,28 +57,25 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
const int32 inter_op = options.config.inter_op_parallelism_threads();
if (inter_op != 0) return inter_op;
#ifdef INTEL_MKL
- // Early return if MKL is disabled
- if (DisableMKL())
- return port::NumSchedulableCPUs();
-
- // MKL library executes ops in parallel using OMP threads
- // Set inter_op conservatively to avoid thread oversubscription that could
- // lead to severe perf degradations and OMP resource exhaustion
- int mkl_intra_op = 1;
-#ifdef _OPENMP
- mkl_intra_op = omp_get_max_threads();
-#endif // _OPENMP
- CHECK_GE(mkl_intra_op, 1);
- const int32 mkl_inter_op = std::max(
- (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2);
- VLOG(0) << "Creating new thread pool with default inter op setting: "
- << mkl_inter_op
- << ". Tune using inter_op_parallelism_threads for best performance.";
- return mkl_inter_op;
-#else
+ if (!DisableMKL()) {
+ // MKL library executes ops in parallel using OMP threads
+ // Set inter_op conservatively to avoid thread oversubscription that could
+ // lead to severe perf degradations and OMP resource exhaustion
+ int mkl_intra_op = 1;
+ #ifdef _OPENMP
+ mkl_intra_op = omp_get_max_threads();
+ #endif // _OPENMP
+ CHECK_GE(mkl_intra_op, 1);
+ const int32 mkl_inter_op = std::max(
+ (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2);
+ VLOG(0) << "Creating new thread pool with default inter op setting: "
+ << mkl_inter_op
+ << ". Tune using inter_op_parallelism_threads for best performance.";
+ return mkl_inter_op;
+ }
+#endif // INTEL_MKL
// Default to using the number of cores available in the process.
return port::NumSchedulableCPUs();
-#endif // INTEL_MKL
}
thread::ThreadPool* NewThreadPoolFromSessionOptions(
diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc
index 29c01d7f72..f188016610 100644
--- a/tensorflow/core/common_runtime/threadpool_device.cc
+++ b/tensorflow/core/common_runtime/threadpool_device.cc
@@ -50,7 +50,7 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options,
allocator_(allocator),
scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {
#ifdef INTEL_MKL
- // Eearly return when MKL is disabled
+ // Early return when MKL is disabled
if (DisableMKL())
return;
#ifdef _OPENMP
@@ -118,7 +118,7 @@ class MklCPUAllocatorFactory : public AllocatorFactory {
};
#ifdef ENABLE_MKL
-REGISTER_MEM_ALLOCATOR("MklCPUAllocator", 200, MklCPUAllocatorFactory);
+REGISTER_MEM_ALLOCATOR("MklCPUAllocator", (DisableMKL() ? 50 : 200), MklCPUAllocatorFactory);
#endif // ENABLE_MKL
} // namespace