diff options
author | Skye Wanderman-Milne <skyewm@google.com> | 2018-04-09 14:26:55 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-04-09 14:29:17 -0700 |
commit | 9b18bd70b5739d646b21b7d45de0e5c96b8cc2a1 (patch) | |
tree | 92b7fc07a773dd20448f936261f1995c208a5ef9 | |
parent | e60c87c978f7fbb848bc66ca3caa90ccdab8a9b9 (diff) |
Don't initialize global threadpool in GraphRunner.
TF_Graph creates a ShapeRefiner, which in
turn creates a GraphRunner, which prior to this change would eventually create a
LocalDevice that initialized the global eigen threadpool. This prevents
users from specifying a custom number of threads for the pool via a
ConfigProto.
This change introduces a new device class, SingleThreadedCpuDevice, that can
be used for light-weight computations without initializing the threadpool.
Addresses #18300.
PiperOrigin-RevId: 192188031
-rw-r--r-- | tensorflow/core/BUILD | 1 | ||||
-rw-r--r-- | tensorflow/core/common_runtime/eigen_thread_pool.h | 2 | ||||
-rw-r--r-- | tensorflow/core/common_runtime/graph_runner.cc | 21 | ||||
-rw-r--r-- | tensorflow/core/common_runtime/single_threaded_cpu_device.h | 82 |
4 files changed, 93 insertions, 13 deletions
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 13b74b852a..c5ca421ced 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2280,6 +2280,7 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [ "common_runtime/scoped_allocator.h", "common_runtime/scoped_allocator_mgr.h", "common_runtime/session_factory.h", + "common_runtime/single_threaded_cpu_device.h", "common_runtime/stats_publisher_interface.h", "common_runtime/step_stats_collector.h", "common_runtime/threadpool_device.h", diff --git a/tensorflow/core/common_runtime/eigen_thread_pool.h b/tensorflow/core/common_runtime/eigen_thread_pool.h index c6f13c6a11..ddd627fb20 100644 --- a/tensorflow/core/common_runtime/eigen_thread_pool.h +++ b/tensorflow/core/common_runtime/eigen_thread_pool.h @@ -16,6 +16,8 @@ limitations under the License. #ifndef TENSORFLOW_COMMON_RUNTIME_EIGEN_THREAD_POOL_H_ #define TENSORFLOW_COMMON_RUNTIME_EIGEN_THREAD_POOL_H_ +#define EIGEN_USE_THREADS + #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/lib/core/threadpool.h" diff --git a/tensorflow/core/common_runtime/graph_runner.cc b/tensorflow/core/common_runtime/graph_runner.cc index 1125d2a34a..790f2eaa1e 100644 --- a/tensorflow/core/common_runtime/graph_runner.cc +++ b/tensorflow/core/common_runtime/graph_runner.cc @@ -13,6 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +// TODO(skyewm): this is necessary to make the single_threaded_cpu_device.h +// include work. Some other include must be including eigen without defining +// this. Consider defining in this in a BUILD rule. +#define EIGEN_USE_THREADS + #include "tensorflow/core/common_runtime/graph_runner.h" #include "tensorflow/core/common_runtime/device_factory.h" @@ -20,6 +25,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/memory_types.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" +#include "tensorflow/core/common_runtime/single_threaded_cpu_device.h" #include "tensorflow/core/framework/log_memory.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_util.h" @@ -36,18 +42,6 @@ namespace tensorflow { namespace { -std::unique_ptr<Device> GetCPUDevice(Env* env) { - std::vector<Device*> devices; - SessionOptions session_options; - session_options.env = env; - Status s = DeviceFactory::GetFactory(DEVICE_CPU) - ->CreateDevices(session_options, "", &devices); - if (s.ok() && !devices.empty()) { - return std::unique_ptr<Device>(devices[0]); - } - return nullptr; -} - // A simple rendezvous class. // Assumes a single sender and a single receiver, no duplicate sends, and no // sends of dead tensors. @@ -98,7 +92,8 @@ class SimpleRendezvous : public Rendezvous { } // namespace GraphRunner::GraphRunner(Env* env) - : device_deleter_(GetCPUDevice(env)), device_(device_deleter_.get()) {} + : device_deleter_(new SingleThreadedCpuDevice(env)), + device_(device_deleter_.get()) {} GraphRunner::GraphRunner(Device* device) : device_(device) {} GraphRunner::~GraphRunner() {} diff --git a/tensorflow/core/common_runtime/single_threaded_cpu_device.h b/tensorflow/core/common_runtime/single_threaded_cpu_device.h new file mode 100644 index 0000000000..04d5af9087 --- /dev/null +++ b/tensorflow/core/common_runtime/single_threaded_cpu_device.h @@ -0,0 +1,82 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_SINGLE_THREADED_CPU_DEVICE_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_SINGLE_THREADED_CPU_DEVICE_H_ + +#define EIGEN_USE_THREADS + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/eigen_thread_pool.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/lib/core/threadpool.h" + +namespace tensorflow { + +class Env; + +// A simple single-threaded CPU device. This can be used to run inexpensive +// computations. In particular, using this avoids initializing the global thread +// pools in LocalDevice. +class SingleThreadedCpuDevice : public Device { + public: + SingleThreadedCpuDevice(Env* env) + : Device(env, Device::BuildDeviceAttributes("/device:CPU:0", DEVICE_CPU, + Bytes(256 << 20), + DeviceLocality())) { + eigen_worker_threads_.num_threads = 1; + eigen_worker_threads_.workers = new thread::ThreadPool( + env, "graph_runner", eigen_worker_threads_.num_threads); + eigen_threadpool_wrapper_.reset( + new EigenThreadPoolWrapper(eigen_worker_threads_.workers)); + eigen_device_.reset(new Eigen::ThreadPoolDevice( + eigen_threadpool_wrapper_.get(), eigen_worker_threads_.num_threads)); + set_tensorflow_cpu_worker_threads(&eigen_worker_threads_); + set_eigen_cpu_device(eigen_device_.get()); + } + + ~SingleThreadedCpuDevice() override { + eigen_threadpool_wrapper_.reset(); + eigen_device_.reset(); + delete eigen_worker_threads_.workers; + } + + Status Sync() override { return Status::OK(); } + + Status MakeTensorFromProto(const TensorProto& tensor_proto, + const AllocatorAttributes alloc_attrs, + Tensor* tensor) override { + Tensor parsed(tensor_proto.dtype()); + if (!parsed.FromProto(cpu_allocator(), tensor_proto)) { + return errors::InvalidArgument("Cannot parse tensor from tensor_proto."); + } + *tensor = parsed; + return Status::OK(); + } + + Allocator* GetAllocator(AllocatorAttributes attr) override { + return cpu_allocator(); + } + + private: + DeviceBase::CpuWorkerThreads eigen_worker_threads_; + std::unique_ptr<Eigen::ThreadPoolInterface> eigen_threadpool_wrapper_; + std::unique_ptr<Eigen::ThreadPoolDevice> eigen_device_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_SINGLE_THREADED_CPU_DEVICE_H_ |