aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Skye Wanderman-Milne <skyewm@google.com>2018-04-09 14:26:55 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-04-09 14:29:17 -0700
commit9b18bd70b5739d646b21b7d45de0e5c96b8cc2a1 (patch)
tree92b7fc07a773dd20448f936261f1995c208a5ef9
parente60c87c978f7fbb848bc66ca3caa90ccdab8a9b9 (diff)
Don't initialize global threadpool in GraphRunner.
TF_Graph creates a ShapeRefiner, which in turn creates a GraphRunner, which prior to this change would eventually create a LocalDevice that initialized the global eigen threadpool. This prevents users from specifying a custom number of threads for the pool via a ConfigProto. This change introduces a new device class, SingleThreadedCpuDevice, that can be used for light-weight computations without initializing the threadpool. Addresses #18300. PiperOrigin-RevId: 192188031
-rw-r--r--tensorflow/core/BUILD1
-rw-r--r--tensorflow/core/common_runtime/eigen_thread_pool.h2
-rw-r--r--tensorflow/core/common_runtime/graph_runner.cc21
-rw-r--r--tensorflow/core/common_runtime/single_threaded_cpu_device.h82
4 files changed, 93 insertions, 13 deletions
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 13b74b852a..c5ca421ced 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -2280,6 +2280,7 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [
"common_runtime/scoped_allocator.h",
"common_runtime/scoped_allocator_mgr.h",
"common_runtime/session_factory.h",
+ "common_runtime/single_threaded_cpu_device.h",
"common_runtime/stats_publisher_interface.h",
"common_runtime/step_stats_collector.h",
"common_runtime/threadpool_device.h",
diff --git a/tensorflow/core/common_runtime/eigen_thread_pool.h b/tensorflow/core/common_runtime/eigen_thread_pool.h
index c6f13c6a11..ddd627fb20 100644
--- a/tensorflow/core/common_runtime/eigen_thread_pool.h
+++ b/tensorflow/core/common_runtime/eigen_thread_pool.h
@@ -16,6 +16,8 @@ limitations under the License.
#ifndef TENSORFLOW_COMMON_RUNTIME_EIGEN_THREAD_POOL_H_
#define TENSORFLOW_COMMON_RUNTIME_EIGEN_THREAD_POOL_H_
+#define EIGEN_USE_THREADS
+
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/lib/core/threadpool.h"
diff --git a/tensorflow/core/common_runtime/graph_runner.cc b/tensorflow/core/common_runtime/graph_runner.cc
index 1125d2a34a..790f2eaa1e 100644
--- a/tensorflow/core/common_runtime/graph_runner.cc
+++ b/tensorflow/core/common_runtime/graph_runner.cc
@@ -13,6 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
+// TODO(skyewm): this is necessary to make the single_threaded_cpu_device.h
+// include work. Some other include must be including eigen without defining
+// this. Consider defining in this in a BUILD rule.
+#define EIGEN_USE_THREADS
+
#include "tensorflow/core/common_runtime/graph_runner.h"
#include "tensorflow/core/common_runtime/device_factory.h"
@@ -20,6 +25,7 @@ limitations under the License.
#include "tensorflow/core/common_runtime/function.h"
#include "tensorflow/core/common_runtime/memory_types.h"
#include "tensorflow/core/common_runtime/rendezvous_mgr.h"
+#include "tensorflow/core/common_runtime/single_threaded_cpu_device.h"
#include "tensorflow/core/framework/log_memory.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/tensor_util.h"
@@ -36,18 +42,6 @@ namespace tensorflow {
namespace {
-std::unique_ptr<Device> GetCPUDevice(Env* env) {
- std::vector<Device*> devices;
- SessionOptions session_options;
- session_options.env = env;
- Status s = DeviceFactory::GetFactory(DEVICE_CPU)
- ->CreateDevices(session_options, "", &devices);
- if (s.ok() && !devices.empty()) {
- return std::unique_ptr<Device>(devices[0]);
- }
- return nullptr;
-}
-
// A simple rendezvous class.
// Assumes a single sender and a single receiver, no duplicate sends, and no
// sends of dead tensors.
@@ -98,7 +92,8 @@ class SimpleRendezvous : public Rendezvous {
} // namespace
GraphRunner::GraphRunner(Env* env)
- : device_deleter_(GetCPUDevice(env)), device_(device_deleter_.get()) {}
+ : device_deleter_(new SingleThreadedCpuDevice(env)),
+ device_(device_deleter_.get()) {}
GraphRunner::GraphRunner(Device* device) : device_(device) {}
GraphRunner::~GraphRunner() {}
diff --git a/tensorflow/core/common_runtime/single_threaded_cpu_device.h b/tensorflow/core/common_runtime/single_threaded_cpu_device.h
new file mode 100644
index 0000000000..04d5af9087
--- /dev/null
+++ b/tensorflow/core/common_runtime/single_threaded_cpu_device.h
@@ -0,0 +1,82 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_SINGLE_THREADED_CPU_DEVICE_H_
+#define TENSORFLOW_CORE_COMMON_RUNTIME_SINGLE_THREADED_CPU_DEVICE_H_
+
+#define EIGEN_USE_THREADS
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/common_runtime/device.h"
+#include "tensorflow/core/common_runtime/eigen_thread_pool.h"
+#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/lib/core/threadpool.h"
+
+namespace tensorflow {
+
+class Env;
+
+// A simple single-threaded CPU device. This can be used to run inexpensive
+// computations. In particular, using this avoids initializing the global thread
+// pools in LocalDevice.
+class SingleThreadedCpuDevice : public Device {
+ public:
+ SingleThreadedCpuDevice(Env* env)
+ : Device(env, Device::BuildDeviceAttributes("/device:CPU:0", DEVICE_CPU,
+ Bytes(256 << 20),
+ DeviceLocality())) {
+ eigen_worker_threads_.num_threads = 1;
+ eigen_worker_threads_.workers = new thread::ThreadPool(
+ env, "graph_runner", eigen_worker_threads_.num_threads);
+ eigen_threadpool_wrapper_.reset(
+ new EigenThreadPoolWrapper(eigen_worker_threads_.workers));
+ eigen_device_.reset(new Eigen::ThreadPoolDevice(
+ eigen_threadpool_wrapper_.get(), eigen_worker_threads_.num_threads));
+ set_tensorflow_cpu_worker_threads(&eigen_worker_threads_);
+ set_eigen_cpu_device(eigen_device_.get());
+ }
+
+ ~SingleThreadedCpuDevice() override {
+ eigen_threadpool_wrapper_.reset();
+ eigen_device_.reset();
+ delete eigen_worker_threads_.workers;
+ }
+
+ Status Sync() override { return Status::OK(); }
+
+ Status MakeTensorFromProto(const TensorProto& tensor_proto,
+ const AllocatorAttributes alloc_attrs,
+ Tensor* tensor) override {
+ Tensor parsed(tensor_proto.dtype());
+ if (!parsed.FromProto(cpu_allocator(), tensor_proto)) {
+ return errors::InvalidArgument("Cannot parse tensor from tensor_proto.");
+ }
+ *tensor = parsed;
+ return Status::OK();
+ }
+
+ Allocator* GetAllocator(AllocatorAttributes attr) override {
+ return cpu_allocator();
+ }
+
+ private:
+ DeviceBase::CpuWorkerThreads eigen_worker_threads_;
+ std::unique_ptr<Eigen::ThreadPoolInterface> eigen_threadpool_wrapper_;
+ std::unique_ptr<Eigen::ThreadPoolDevice> eigen_device_;
+};
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_CORE_COMMON_RUNTIME_SINGLE_THREADED_CPU_DEVICE_H_