diff options
author | 2017-11-12 16:47:56 -0800 | |
---|---|---|
committer | 2017-11-12 16:52:38 -0800 | |
commit | 08b3d55b65bb8cf621e4f3e9f25cc7779079c4e8 (patch) | |
tree | 66fc764f5c20cb8d2f5ae597cbbf6fc76de30455 | |
parent | 24e368f1a1f1edd4c1f6c13b165c8aa5057c7f11 (diff) |
[SE] Delete deprecated MachineManager.
PiperOrigin-RevId: 175472763
-rw-r--r-- | tensorflow/stream_executor/machine_manager.cc | 291 | ||||
-rw-r--r-- | tensorflow/stream_executor/machine_manager.h | 212 |
2 files changed, 0 insertions, 503 deletions
diff --git a/tensorflow/stream_executor/machine_manager.cc b/tensorflow/stream_executor/machine_manager.cc deleted file mode 100644 index 2b61c8a0bc..0000000000 --- a/tensorflow/stream_executor/machine_manager.cc +++ /dev/null @@ -1,291 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/stream_executor/machine_manager.h" - -#include "tensorflow/stream_executor/platform/port.h" - -#include "tensorflow/stream_executor/dso_loader.h" -#include "tensorflow/stream_executor/lib/error.h" -#include "tensorflow/stream_executor/platform/logging.h" -#include "tensorflow/stream_executor/platform/mutex.h" -#include "tensorflow/stream_executor/platform/port.h" - -namespace perftools { -namespace gputools { - -mutex MachineManager::mu_{LINKER_INITIALIZED}; - -MachineManager *MachineManager::singleton_ = nullptr; - -PlatformKind MachineManager::DetectPreferredPlatform() { -// TODO(leary) for KNC card experiments, figure out a legitimate way to -// determine this. For now, we use a compile-time hint so we can compile tests -// for both. -#if defined TENSORFLOW_STREAM_EXECUTOR_MACHINE_MANAGER_PREFER_OPENCL - return PlatformKind::kOpenCL; -#elif defined TENSORFLOW_STREAM_EXECUTOR_MACHINE_MANAGER_PREFER_HOST - return PlatformKind::kHost; -#else - return PlatformKind::kCuda; -#endif -} - -/* static */ port::StatusOr<std::unique_ptr<MachineManager>> -MachineManager::Create(PlatformKind kind, DeviceOptions options, - const PluginConfig &config) { - std::unique_ptr<MachineManager> machine_manager{ - new MachineManager{kind, options, config}}; - auto init_status = machine_manager->Init(); - if (!init_status.ok()) { - return init_status; - } - - return std::move(machine_manager); -} - -MachineManager::MachineManager(PlatformKind platform, - DeviceOptions device_options, - const PluginConfig &config) - : platform_(platform), - device_options_(device_options), - plugin_config_(config), - min_numa_node_(0), - limit_numa_node_(0) {} - -port::Status MachineManager::Init() { - // Initialize the first StreamExecutor, then use that platform interface to - // grab the device count. - executors_.resize(1); - executors_[0].reset(new StreamExecutor{platform_, plugin_config_}); - auto status = executors_[0]->Init(0 /* = device_ordinal */, device_options_); - if (!status.ok()) { - return port::Status{ - port::error::FAILED_PRECONDITION, - port::StrCat( - "failed to initialize StreamExecutor for device ordinal 0: ", - status.ToString())}; - } - int device_count = executors_[0]->PlatformDeviceCount(); - if (device_count == 0) { - LOG(WARNING) << "no devices found for platform " - << PlatformKindString(platform_); - min_numa_node_ = limit_numa_node_ = 0; - return port::Status::OK(); - } - - streams_.resize(device_count); - streams_[0].reset(new Stream(executors_[0].get())); - if (!streams_[0]->Init().ok()) { - return port::Status{ - port::error::FAILED_PRECONDITION, - "failed to initialize default stream for device ordinal 0"}; - } - - min_numa_node_ = executors_[0]->GetDeviceDescription().numa_node(); - limit_numa_node_ = min_numa_node_ + 1; - - executors_.resize(device_count); - for (int device_ordinal = 1; device_ordinal < device_count; - ++device_ordinal) { - StreamExecutor *stream_exec = new StreamExecutor{platform_, plugin_config_}; - executors_[device_ordinal].reset(stream_exec); - auto status = stream_exec->Init(device_ordinal, device_options_); - if (!status.ok()) { - return port::Status( - port::error::FAILED_PRECONDITION, - port::StrCat( - "failed to initialize StreamExecutor for device ordinal ", - device_ordinal, ": ", status.ToString())); - } - - min_numa_node_ = std::min(min_numa_node_, - stream_exec->GetDeviceDescription().numa_node()); - limit_numa_node_ = std::max( - limit_numa_node_, stream_exec->GetDeviceDescription().numa_node() + 1); - - if (!stream_exec->GetDeviceDescription().ecc_enabled()) { - LOG(WARNING) << "ECC not enabled for device ordinal: " << device_ordinal; - } - - streams_[device_ordinal].reset( - new Stream(executors_[device_ordinal].get())); - if (!streams_[device_ordinal]->Init().ok()) { - return port::Status( - port::error::FAILED_PRECONDITION, - port::StrCat( - "failed to initialize default stream for device ordinal ", - device_ordinal)); - } - } - - return port::Status::OK(); -} - -int MachineManager::device_count() const { return executors_.size(); } - -port::Status MachineManager::EnablePeerAccess() { - auto peer_access_map = GetPeerAccessMap(); - for (const auto &access : *peer_access_map) { - auto devices = access.first; - if (access.second) { - StreamExecutor *from = executors_[devices.first].get(); - StreamExecutor *to = executors_[devices.second].get(); - auto status = from->EnablePeerAccessTo(to); - if (!status.ok()) { - return status; - } - } else { - LOG(INFO) << "cannot enable peer access from device ordinal " - << devices.first << " to device ordinal " << devices.second; - } - } - return port::Status::OK(); -} - -std::unique_ptr<std::map<std::pair<int, int>, bool>> -MachineManager::GetPeerAccessMap() { - auto *map = new std::map<std::pair<int, int>, bool>; - for (int i = 0; i < device_count(); ++i) { - for (int j = 0; j < device_count(); ++j) { - StreamExecutor *from = executors_[i].get(); - StreamExecutor *to = executors_[j].get(); - (*map)[{i, j}] = from->CanEnablePeerAccessTo(to); - } - } - - return std::unique_ptr<std::map<std::pair<int, int>, bool>>{map}; -} - -StreamExecutor *MachineManager::executor_for_device(int device_ordinal) const { - CHECK_GE(device_ordinal, 0) << "device ordinal must be non-negative"; - CHECK(0 <= device_ordinal && device_ordinal < device_count()) - << "device " << device_ordinal << " out of range with device count " - << device_count(); - StreamExecutor *executor = executors_[device_ordinal].get(); - CHECK(executor != nullptr); - return executor; -} - -int MachineManager::ExecutorToBus(const StreamExecutor *stream_exec) const { - return stream_exec->GetDeviceDescription().numa_node() - min_numa_node_; -} - -int MachineManager::DeviceToBus(int device_ordinal) const { - return ExecutorToBus(executor_for_device(device_ordinal)); -} - -int MachineManager::ExecutorToNumaNode( - const StreamExecutor *stream_exec) const { - return stream_exec->GetDeviceDescription().numa_node(); -} - -int MachineManager::DeviceToNumaNode(int device_ordinal) const { - return ExecutorToNumaNode(executor_for_device(device_ordinal)); -} - -StreamExecutor *MachineManager::first_executor_for_bus(int bus_ordinal) { - CHECK_LT(bus_ordinal, bus_count()) << "bus ordinal out of available range"; - for (auto &executor : executors_) { - if (ExecutorToBus(executor.get()) == bus_ordinal) { - return executor.get(); - } - } - - LOG(WARNING) << "could not find executor requested for bus ordinal: " - << bus_ordinal; - return nullptr; -} - -StreamExecutor *MachineManager::first_executor_for_numa_node(int numa_node) { - for (auto &executor : executors_) { - if (ExecutorToNumaNode(executor.get()) == numa_node) { - return executor.get(); - } - } - - LOG(WARNING) << "could not find executor requested for numa_node: " - << numa_node; - return nullptr; -} - -Stream *MachineManager::stream_for_device(int device_ordinal) { - CHECK(0 <= device_ordinal && device_ordinal < device_count()); - Stream *stream = streams_[device_ordinal].get(); - CHECK(stream != nullptr); - return stream; -} - -/* static */ port::StatusOr<MachineManager *> -MachineManager::CreateSingletonInternal(PlatformKind platform, - DeviceOptions options, - const PluginConfig &config) { - if (singleton_ != nullptr) { - return port::Status{ - port::error::ALREADY_EXISTS, - "cannot create machine manager singleton; one already exists"}; - } - - auto create_status = Create(platform, options, config); - if (!create_status.ok()) { - return create_status.status(); - } - - singleton_ = create_status.ConsumeValueOrDie().release(); - - VLOG(1) << "machine manager singleton is " << singleton_ << " with platform " - << PlatformKindString(platform) << " and device options " - << options.ToString(); - - return singleton_; -} - -/* static */ MachineManager *MachineManager::CreateSingletonOrDie( - PlatformKind platform, DeviceOptions options, const PluginConfig &config) { - auto status = CreateSingleton(platform, options, config); - if (!status.ok()) { - LOG(FATAL) << "failed to create MachineManager singleton: " - << status.status(); - } - return status.ValueOrDie(); -} - -/* static */ port::StatusOr<MachineManager *> MachineManager::CreateSingleton( - PlatformKind platform, DeviceOptions device_options, - const PluginConfig &config) { - mutex_lock lock{mu_}; - return CreateSingletonInternal(platform, device_options, config); -} - -/* static */ MachineManager *MachineManager::singleton() { - mutex_lock lock{mu_}; - if (singleton_ == nullptr) { - PlatformKind platform = DetectPreferredPlatform(); - DeviceOptions options = DeviceOptions::Default(); - auto status = CreateSingletonInternal(platform, options, PluginConfig()); - if (!status.ok()) { - LOG(FATAL) - << "failed to create MachineManager singleton: " - "singleton accessor attempted lazy construction but failed: " - << status.status(); - } - return status.ValueOrDie(); - } - - return singleton_; -} - -} // namespace gputools -} // namespace perftools diff --git a/tensorflow/stream_executor/machine_manager.h b/tensorflow/stream_executor/machine_manager.h deleted file mode 100644 index 65396dd1ff..0000000000 --- a/tensorflow/stream_executor/machine_manager.h +++ /dev/null @@ -1,212 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// This interface provides a machine-wide resource management singleton -// interface as a convenience for users who will want to exploit all of the GPU -// resources present on the system. -// -// To use the singleton interface: -// -// // At start of program or in your module initializer. -// // Do not call this with different sets of arguments! -// MachineManager::CreateSingletonOrDie( -// MachineManager::DetectPreferredPlatform(), DeviceOptions::Default()); -// -// // At any point after that, this convenience interface avoids you having to -// // pass those two parameters: -// StreamExecutor *device0_executor = -// MachineManager::singleton()->executor_for_device(0 /* = ordinal */); -// ... - -// ----------------- THIS CLASS IS DEPRECATED - DO NOT USE ------------------ -// This class is not suitable for open-sourcing, as it does not support -// plugins and depends on hardcoded PlatformKind enums. MultiPlatformManager and -// Platform plugins are the replacements. -// ----------------- THIS CLASS IS DEPRECATED - DO NOT USE ------------------ - -#ifndef TENSORFLOW_STREAM_EXECUTOR_MACHINE_MANAGER_H_ -#define TENSORFLOW_STREAM_EXECUTOR_MACHINE_MANAGER_H_ - -#include <map> -#include <memory> -#include <utility> -#include <vector> - -#include "tensorflow/stream_executor/device_options.h" // IWYU pragma: export -#include "tensorflow/stream_executor/lib/status.h" -#include "tensorflow/stream_executor/lib/statusor.h" -#include "tensorflow/stream_executor/platform/thread_annotations.h" -#include "tensorflow/stream_executor/stream.h" -#include "tensorflow/stream_executor/stream_executor.h" - -namespace perftools { -namespace gputools { - -// MachineManager is used to instantiate and manage singleton resources for -// all the GPUs present on a machine. This basically amounts to having a -// StreamExecutor-per-device pool. -// -// Thread-safe. -class MachineManager { - public: - // Inspects the host to determine the preferred GPU execution platform. - // To force OpenCL from a build target on a machine that has both OpenCL and - // CUDA capabilities, link against the :stream_executor_prefer_opencl target. - static PlatformKind DetectPreferredPlatform(); - - // Returns the machine manager singleton. - // If the singleton has not yet been created when this is invoked, this - // creates it with resonable default options, otherwise it returns the - // already-created singleton. If there are errors during creation, this call - // will terminate the program. - static MachineManager *singleton(); - - // Returns a singleton instance of the machine manager -- it's generally - // assumed that users will have one of these for a real-world application as a - // form of resource manager. - // - // This should only be called once, at the initialization of an application, - // if at all -- MachineManager::singleton() will return a value with sensible - // default as determined by DetectPreferredPlatform. Attempts to create the - // singleton with options multiple times will result in an error. - static port::StatusOr<MachineManager *> CreateSingleton( - PlatformKind platform, DeviceOptions device_options, - const PluginConfig &config = PluginConfig()); - - // Convenience "or die" wrapper around the above call. - static MachineManager *CreateSingletonOrDie( - PlatformKind platform, DeviceOptions device_options, - const PluginConfig &config = PluginConfig()); - - // Creates a new instantiation of the MachineManager. - // Warning: generally users will want to use the singleton form, see - // MachineManager::singleton(). - // - // The machine manager has a number of devices that it detects on creation - // that does not change over the course of its lifetime. This does not support - // things like hot-plugging of GPUs or the event of GPUs dropping off the bus - // in a recoverable manner. - static port::StatusOr<std::unique_ptr<MachineManager>> Create( - PlatformKind kind, DeviceOptions options, - const PluginConfig &config = PluginConfig()); - - // Returns the number of devices visible to the machine manager. - int device_count() const; - - // Returns the StreamExecutor for one of the machine-manager visible devices. - // Checks that device_ordinal is within device_count() bound. - StreamExecutor *executor_for_device(int device_ordinal) const; - - // Returns the bus ordinal count (as determined by the span of NUMA nodes - // associated with the available devices). - int bus_count() const { return limit_numa_node_ - min_numa_node_; } - - // Returns the bus ordinal associated with a given device ordinal. - int DeviceToBus(int device_ordinal) const; - - // Returns the NUMA node associated with a given device ordinal. - int DeviceToNumaNode(int device_ordinal) const; - - // Returns the first StreamExecutor (within device_count() ordinals that has - // the corresponding bus ordinal, or nullptr if none is found. - // - // The valid bus ordinals can be enumerated by scanning through the executors - // and seeing what bus number they are on. - StreamExecutor *first_executor_for_bus(int bus_ordinal); - - // Returns the first StreamExecutor associated with the specified - // numa_node, or nullptr if none is found. - StreamExecutor *first_executor_for_numa_node(int numa_node); - - // Returns the default stream for the default executor (that returned by - // executor_for_device()). The same stream will be returned for all calls to - // stream_for_device() (with the same device_ordinal). - Stream *stream_for_device(int device_ordinal); - - // Returns the platform that this machine manager was created to target. - PlatformKind platform() const { return platform_; } - - // Enables peer access between all possible devices on this platform. - // Only dies due to failure to enable peer access for devices in which - // GetPeerAccessMap() is true. - port::Status EnablePeerAccess(); - - // Returns a map that says, for pairs (device ordinal i, device ordinal j), - // whether i can access j's memory space. - std::unique_ptr<std::map<std::pair<int, int>, bool>> GetPeerAccessMap(); - - private: - // Guts of the singleton creation mechanism that requires the exclusive - // singleton lock to be held, in order to prevent deadlock due to method - // composition. - static port::StatusOr<MachineManager *> CreateSingletonInternal( - PlatformKind platform, DeviceOptions options, const PluginConfig &config) - EXCLUSIVE_LOCKS_REQUIRED(mu_); - - // Private constructor used in singleton creation. - MachineManager(PlatformKind platform, DeviceOptions options, - const PluginConfig &config); - - // Populates the executors_ vector with an executor per observable device - // ordinal on the platform. Logs and returns false if any of the - // Stream Executors cannot be created. - port::Status Init(); - - // Converts a StreamExecutor's NUMA node association into a bus ordinal for - // this machine. - int ExecutorToBus(const StreamExecutor *stream_exec) const; - - // Returns the NUMA node association for the StreamExecutor. - int ExecutorToNumaNode(const StreamExecutor *stream_exec) const; - - // Mutex that guards the initialization of the machine manager static - // variable. - static mutex mu_; - - // Singleton MachineManager value -- assignment to this is protected by a - // static singleton guard clause. - static MachineManager *singleton_ GUARDED_BY(mu_); - - // Holds an executor associated with each device ordinal present in the - // system, which are the indices. Immutable after initialization. - std::vector<std::unique_ptr<StreamExecutor>> executors_; - - // Holds an stream associated with each device ordinal present in the - // system, which are the indices. Immutable after initialization. - std::vector<std::unique_ptr<Stream>> streams_; - - // The platform that this is managing for the machine. - PlatformKind platform_; - - // Options used to create StreamExecutors on each of the respective devices. - DeviceOptions device_options_; - - // Plugin configuration to use for all StreamExecutors created by this object. - PluginConfig plugin_config_; - - // The smallest NUMA node value for any device managed by this machine - // manager. Used, along with limit_numa_node_, to convert NUMA nodes into bus - // ordinals. The NUMA node space occupied by GPUs is assumed to be dense. - int min_numa_node_; - - // Larger than the NUMA node value for any device managed by this machine - // manager. - int limit_numa_node_; -}; - -} // namespace gputools -} // namespace perftools - -#endif // TENSORFLOW_STREAM_EXECUTOR_MACHINE_MANAGER_H_ |