aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Vijay Vasudevan <vrv@google.com>2016-05-02 19:40:48 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-05-02 20:51:32 -0700
commitac959c8a942b4f9202dbd6d8bffc6fcf7b096695 (patch)
tree3bf14f1a93dbf9ae15df9ed280656a381a73a68a
parent0f70b8e4b72109a2f99a6235da2f17ec142040ad (diff)
TensorFlow: Move assignment / choice of device in SimplePlacer out of
AssignDevice and into the main loop, for a future refactor where we inject the choice of algorithm to use when selecting a device for a node. Currently this continues to use just the first device in the list, but we would like to be able to play around with algorithms that choose alternative strategies, perhaps based on other heuristics and runtime information. SimplePlacer remains the code that does performs the precondition filters (hard-device assignment and validation), so that other placement algorithms don't have to worry about enforcing the correct assignments / conditions. Change: 121339923
-rw-r--r--tensorflow/core/common_runtime/simple_placer.cc236
-rw-r--r--tensorflow/core/common_runtime/simple_placer.h5
2 files changed, 131 insertions, 110 deletions
diff --git a/tensorflow/core/common_runtime/simple_placer.cc b/tensorflow/core/common_runtime/simple_placer.cc
index 9dd4981b5f..19a44c6a98 100644
--- a/tensorflow/core/common_runtime/simple_placer.cc
+++ b/tensorflow/core/common_runtime/simple_placer.cc
@@ -259,129 +259,125 @@ class ColocationGraph {
// For the given node, subject to the constraints previously given
// to this ColocationGraph, set its assigned_device_name. Returns OK
// if a satisfying device can be found, otherwise an error.
- Status AssignDevice(Node* node) {
- int node_root = FindRoot(node->id());
- if (members_[node_root].assigned_device == nullptr) {
- // We have not yet assigned a device for the colocated node set containing
- // n, so we do so now using the constraints on the root node.
+ Status GetDevicesForNode(Node* node, std::vector<Device*>* possible_devices) {
+ possible_devices->clear();
+ const int node_root = FindRoot(node->id());
+ if (!members_[node_root].possible_devices.empty()) {
+ *possible_devices = members_[node_root].possible_devices;
+ return Status::OK();
+ }
- // "devices" will contain the set of feasible placements for the
- // colocated node set containing n.
- std::vector<Device*> devices;
- if (DeviceNameUtils::HasSomeDetails(members_[node_root].device_name)) {
- // The root node has a (possibly partial) device
- // specification, so enumerate the physical devices that
- // conform to it.
- device_set_->FindMatchingDevices(members_[node_root].device_name,
- &devices);
+ // We have not yet computed the possible devices for the
+ // colocated node set containing 'node', so we do so now using the
+ // constraints on the root node.
+
+ // "devices" will contain the set of feasible placements for the
+ // colocated node set containing 'node'.
+ std::vector<Device*> devices;
+ if (DeviceNameUtils::HasSomeDetails(members_[node_root].device_name)) {
+ // The root node has a (possibly partial) device
+ // specification, so enumerate the physical devices that
+ // conform to it.
+ device_set_->FindMatchingDevices(members_[node_root].device_name,
+ &devices);
+
+ if (!devices.empty()) {
+ // Filter devices into those that are compatible with the root
+ // node (and its children).
+ devices = FilterSupportedDevices(
+ devices, members_[node_root].supported_device_types);
+ }
+ // Perform soft placement if allow_soft_placement is set. options_
+ // being NULL is treated as allowing soft placement.
+ if (devices.empty() &&
+ (options_ == nullptr || options_->config.allow_soft_placement())) {
+ // The soft_device_name is the same as the node's device name
+ // without specifying the device type or ID.
+ DeviceNameUtils::ParsedName soft_device_name =
+ members_[node_root].device_name;
+ soft_device_name.type.clear();
+ soft_device_name.has_type = false;
+ soft_device_name.has_id = false;
+ device_set_->FindMatchingDevices(soft_device_name, &devices);
if (!devices.empty()) {
- // Filter devices into those that are compatible with the root
- // node (and its children).
devices = FilterSupportedDevices(
devices, members_[node_root].supported_device_types);
}
+ }
- // Perform soft placement if allow_soft_placement is set. options_
- // being NULL is treated as allowing soft placement.
- if (devices.empty() &&
- (options_ == nullptr || options_->config.allow_soft_placement())) {
- // The soft_device_name is the same as the node's device name
- // without specifying the device type or ID.
- DeviceNameUtils::ParsedName soft_device_name =
- members_[node_root].device_name;
- soft_device_name.type.clear();
- soft_device_name.has_type = false;
- soft_device_name.has_id = false;
- device_set_->FindMatchingDevices(soft_device_name, &devices);
- if (!devices.empty()) {
- devices = FilterSupportedDevices(
- devices, members_[node_root].supported_device_types);
- }
- }
-
- if (devices.empty()) {
- // Return an error when a physical device that matches an explicit
- // device specification is not found. This ensures that we don't
- // assign a node to GPU when the user wanted to force it on CPU.
- DeviceNameUtils::ParsedName specified_device_name;
- if (DeviceNameUtils::ParseFullName(node->def().device(),
- &specified_device_name) &&
- specified_device_name == members_[node_root].device_name) {
- // The specified device and merged set device match, and
- // will appear in the GraphDef (for debugging), so just
- // print the specified device.
- std::vector<Device*> devices_matching_nodedef;
- device_set_->FindMatchingDevices(specified_device_name,
- &devices_matching_nodedef);
- if (devices_matching_nodedef.empty()) {
- // Sometimes it is almost impossible to understand the problem
- // without a list of available devices.
- std::vector<string> device_names;
- for (const Device* device : device_set_->devices()) {
- device_names.push_back(device->name());
- }
- std::sort(device_names.begin(), device_names.end());
-
- return errors::InvalidArgument(
- "Could not satisfy explicit device specification '",
- node->def().device(),
- "' because no devices matching that specification "
- "are registered in this process; available devices: ",
- str_util::Join(device_names, ", "));
- } else if (specified_device_name.has_type) {
- return errors::InvalidArgument(
- "Could not satisfy explicit device specification '",
- node->def().device(), "' because no supported kernel for ",
- specified_device_name.type, " devices is available");
- } else {
- return errors::InvalidArgument(
- "Could not satisfy explicit device specification '",
- node->def().device());
+ if (devices.empty()) {
+ // Return an error when a physical device that matches an explicit
+ // device specification is not found. This ensures that we don't
+ // assign a node to GPU when the user wanted to force it on CPU.
+ DeviceNameUtils::ParsedName specified_device_name;
+ if (DeviceNameUtils::ParseFullName(node->def().device(),
+ &specified_device_name) &&
+ specified_device_name == members_[node_root].device_name) {
+ // The specified device and merged set device match, and
+ // will appear in the GraphDef (for debugging), so just
+ // print the specified device.
+ std::vector<Device*> devices_matching_nodedef;
+ device_set_->FindMatchingDevices(specified_device_name,
+ &devices_matching_nodedef);
+ if (devices_matching_nodedef.empty()) {
+ // Sometimes it is almost impossible to understand the problem
+ // without a list of available devices.
+ std::vector<string> device_names;
+ for (const Device* device : device_set_->devices()) {
+ device_names.push_back(device->name());
}
- } else {
- // The specified device may be a valid device but the
- // merged set device is different, so print both.
+ std::sort(device_names.begin(), device_names.end());
+
return errors::InvalidArgument(
"Could not satisfy explicit device specification '",
node->def().device(),
- "' because the node was colocated with a group of nodes that "
- "required incompatible device '",
- DeviceNameUtils::ParsedNameToString(
- members_[node_root].device_name),
- "'");
+ "' because no devices matching that specification "
+ "are registered in this process; available devices: ",
+ str_util::Join(device_names, ", "));
+ } else if (specified_device_name.has_type) {
+ return errors::InvalidArgument(
+ "Could not satisfy explicit device specification '",
+ node->def().device(), "' because no supported kernel for ",
+ specified_device_name.type, " devices is available");
+ } else {
+ return errors::InvalidArgument(
+ "Could not satisfy explicit device specification '",
+ node->def().device());
}
- }
- } else {
- // The device is completely unspecified, so enumerate the devices that
- // support all of the nodes in the set.
- if (device_set_->devices().empty()) {
- return errors::Internal("No devices are registered");
- }
- devices = FilterSupportedDevices(
- device_set_->devices(), members_[node_root].supported_device_types);
-
- if (devices.empty()) {
+ } else {
+ // The specified device may be a valid device but the
+ // merged set device is different, so print both.
return errors::InvalidArgument(
- "Node had no OpKernel registered to support this operation: ",
- "Operation was ", node->type_string(), " and inputs were ",
- DataTypeVectorString(node->input_types()));
+ "Could not satisfy explicit device specification '",
+ node->def().device(),
+ "' because the node was colocated with a group of nodes that "
+ "required incompatible device '",
+ DeviceNameUtils::ParsedNameToString(
+ members_[node_root].device_name),
+ "'");
}
}
+ } else {
+ // The device is completely unspecified, so enumerate the devices that
+ // support all of the nodes in the set.
+ if (device_set_->devices().empty()) {
+ return errors::Internal("No devices are registered");
+ }
+ devices = FilterSupportedDevices(
+ device_set_->devices(), members_[node_root].supported_device_types);
- // Returns the first device in sorted devices list so we will always
- // choose the same device.
- members_[node_root].assigned_device = devices[0];
- }
- node->set_assigned_device_name(members_[node_root].assigned_device->name());
-
- // Log placement if log_device_placement is set.
- if (options_ && options_->config.log_device_placement()) {
- printf("%s: %s\n", node->name().c_str(),
- node->assigned_device_name().c_str());
- LOG(INFO) << node->name() << ": " << node->assigned_device_name();
+ if (devices.empty()) {
+ return errors::InvalidArgument(
+ "Node had no OpKernel registered to support this operation: ",
+ "Operation was ", node->type_string(), " and inputs were ",
+ DataTypeVectorString(node->input_types()));
+ }
}
+ // Cache the result of the possible devices for this node group.
+ members_[node_root].possible_devices = devices;
+ *possible_devices = members_[node_root].possible_devices;
return Status::OK();
}
@@ -393,21 +389,25 @@ class ColocationGraph {
// The id of the node that is the parent of this one, or its own
// id if it is a root. parent <= 0 indicates that this member is invalid.
int parent = -1;
+
// A proxy for the depth of the tree that is used to prefer
// connecting smaller trees to larger trees when merging disjoint
// sets.
int rank = 0;
+
// The intersection of all device types supported by this node,
// and those of all of its children, in priority order
// of the preferred device.
DeviceTypeVector supported_device_types;
+
// The merged form of the device requested for this node, with
// those of all of its children.
DeviceNameUtils::ParsedName device_name;
- // If this node is a root, stores the Device to which this node
+
+ // If this node is a root, stores a list of Devices to which this node
// and all of its children have been assigned, or nullptr if this
- // has not yet been computed by GetAssignedDevice().
- Device* assigned_device = nullptr;
+ // has not yet been computed.
+ std::vector<Device*> possible_devices;
};
Status InitializeMember(const Node& node, Member* member) {
@@ -632,6 +632,7 @@ Status SimplePlacer::Run() {
// 3. For each node, assign a device based on the constraints in the
// disjoint node set.
+ std::vector<Device*> devices;
for (Node* node : graph_->nodes()) {
// Skip the source and sink nodes.
if (!node->IsOp()) {
@@ -642,14 +643,31 @@ Status SimplePlacer::Run() {
continue;
}
- status = colocation_graph.AssignDevice(node);
+ status = colocation_graph.GetDevicesForNode(node, &devices);
if (!status.ok()) {
return AttachDef(
errors::InvalidArgument("Cannot assign a device to node '",
node->name(), "': ", status.error_message()),
node->def());
}
+
+ // Returns the first device in sorted devices list so we will always
+ // choose the same device.
+ //
+ // TODO(vrv): Factor this assignment out into a pluggable algorithm,
+ // so that SimplePlacer is responsible for enforcing preconditions
+ // and we can experiment with other algorithms when given a choice of
+ // devices.
+ node->set_assigned_device_name(devices[0]->name());
+
+ // Log placement if log_device_placement is set.
+ if (options_ && options_->config.log_device_placement()) {
+ printf("%s: %s\n", node->name().c_str(),
+ node->assigned_device_name().c_str());
+ LOG(INFO) << node->name() << ": " << node->assigned_device_name();
+ }
}
+
return Status::OK();
}
diff --git a/tensorflow/core/common_runtime/simple_placer.h b/tensorflow/core/common_runtime/simple_placer.h
index 0fcaa31e6f..7bfd5a1311 100644
--- a/tensorflow/core/common_runtime/simple_placer.h
+++ b/tensorflow/core/common_runtime/simple_placer.h
@@ -43,7 +43,10 @@ namespace tensorflow {
// The implementation builds a constraint graph with the same set of
// nodes, and edges that represent colocation constraints between
// nodes. Each connected component in the resulting constraint graph
-// is then assigned to a single device.
+// is then assigned to a set of valid devices.
+//
+// Run() will finally assign the device to each node given the list of
+// possible devices.
//
// TODO(mrry): "Soft" constraints, such as "place node 'x' as close as
// possible to node 'y' while respecting the other constraints"?