From 0e14f11516d5aa4350f3f101ca6e60a3c9db9744 Mon Sep 17 00:00:00 2001 From: James Keeling Date: Mon, 16 Jul 2018 11:01:38 -0700 Subject: Add additional Python info into two error messages. If the experimental option is enabled, this will insert Python format information into two error messages in placer.cc. The error messages are emitted when Placer fails to assign a device for an operation. The additional info will cause the Python layer to output where the node was defined in Python. PiperOrigin-RevId: 204770562 --- tensorflow/core/BUILD | 1 + tensorflow/core/common_runtime/placer.cc | 30 +++++++++++++----- tensorflow/core/common_runtime/placer.h | 1 + tensorflow/core/common_runtime/placer_test.cc | 44 +++++++++++++++++++++++++++ 4 files changed, 68 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 8a43220ec5..514713bb96 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -846,6 +846,7 @@ tf_cuda_library( "util/sparse/sparse_tensor.h", "util/stat_summarizer.h", "util/stat_summarizer_options.h", + "util/status_util.h", "util/stream_executor_util.h", "util/strided_slice_op.h", "util/tensor_format.h", diff --git a/tensorflow/core/common_runtime/placer.cc b/tensorflow/core/common_runtime/placer.cc index 0be44662dd..6781c87f6c 100644 --- a/tensorflow/core/common_runtime/placer.cc +++ b/tensorflow/core/common_runtime/placer.cc @@ -30,6 +30,7 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/util/status_util.h" namespace tensorflow { @@ -822,10 +823,10 @@ Status Placer::Run() { std::vector* devices; Status status = colocation_graph.GetDevicesForNode(node, &devices); if (!status.ok()) { - return AttachDef( - errors::InvalidArgument("Cannot assign a device for operation '", - node->name(), "': ", status.error_message()), - *node); + return AttachDef(errors::InvalidArgument( + "Cannot assign a device for operation ", + RichNodeName(node), ": ", status.error_message()), + *node); } // Returns the first device in sorted devices list so we will always @@ -869,10 +870,10 @@ Status Placer::Run() { std::vector* devices; Status status = colocation_graph.GetDevicesForNode(node, &devices); if (!status.ok()) { - return AttachDef( - errors::InvalidArgument("Cannot assign a device for operation '", - node->name(), "': ", status.error_message()), - *node); + return AttachDef(errors::InvalidArgument( + "Cannot assign a device for operation ", + RichNodeName(node), ": ", status.error_message()), + *node); } int assigned_device = -1; @@ -943,4 +944,17 @@ bool Placer::ClientHandlesErrorFormatting() const { options_->config.experimental().client_handles_error_formatting(); } +// Returns the node name in single quotes. If the client handles formatted +// errors, appends a formatting tag which the client will reformat into, for +// example, " (defined at filename:123)". +string Placer::RichNodeName(const Node* node) const { + string quoted_name = strings::StrCat("'", node->name(), "'"); + if (ClientHandlesErrorFormatting()) { + string file_and_line = error_format_tag(*node, "${file}:${line}"); + return strings::StrCat(quoted_name, " (defined at ", file_and_line, ")"); + } else { + return quoted_name; + } +} + } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/placer.h b/tensorflow/core/common_runtime/placer.h index 1f8b450103..fce87269c5 100644 --- a/tensorflow/core/common_runtime/placer.h +++ b/tensorflow/core/common_runtime/placer.h @@ -88,6 +88,7 @@ class Placer { void AssignAndLog(int assigned_device, Node* node) const; void LogDeviceAssignment(const Node* node) const; bool ClientHandlesErrorFormatting() const; + string RichNodeName(const Node* node) const; Graph* const graph_; // Not owned. const DeviceSet* const devices_; // Not owned. diff --git a/tensorflow/core/common_runtime/placer_test.cc b/tensorflow/core/common_runtime/placer_test.cc index 07a7724f16..cede899842 100644 --- a/tensorflow/core/common_runtime/placer_test.cc +++ b/tensorflow/core/common_runtime/placer_test.cc @@ -1142,6 +1142,50 @@ TEST_F(PlacerTest, TestNonexistentGpuNoAllowSoftPlacement) { EXPECT_TRUE(str_util::StrContains(s.error_message(), "/device:fakegpu:11")); } +// Test that the "Cannot assign a device" error message contains a format tag +// when requested. +TEST_F(PlacerTest, TestNonexistentGpuNoAllowSoftPlacementFormatTag) { + Graph g(OpRegistry::Global()); + { // Scope for temporary variables used to construct g. + GraphDefBuilder b(GraphDefBuilder::kFailImmediately); + ops::SourceOp("TestDevice", + b.opts().WithName("in").WithDevice("/device:fakegpu:11")); + TF_EXPECT_OK(BuildGraph(b, &g)); + } + + SessionOptions options; + options.config.mutable_experimental()->set_client_handles_error_formatting( + true); + Status s = Place(&g, &options); + EXPECT_EQ(error::INVALID_ARGUMENT, s.code()); + EXPECT_TRUE( + str_util::StrContains(s.error_message(), + "Cannot assign a device for operation 'in'" + " (defined at ^^node:in:${file}:${line}^^)")); +} + +// Test that the "Cannot assign a device" error message does not contain a +// format tag when not it shouldn't +TEST_F(PlacerTest, TestNonexistentGpuNoAllowSoftPlacementNoFormatTag) { + Graph g(OpRegistry::Global()); + { // Scope for temporary variables used to construct g. + GraphDefBuilder b(GraphDefBuilder::kFailImmediately); + ops::SourceOp("TestDevice", + b.opts().WithName("in").WithDevice("/device:fakegpu:11")); + TF_EXPECT_OK(BuildGraph(b, &g)); + } + + SessionOptions options; + options.config.mutable_experimental()->set_client_handles_error_formatting( + false); + Status s = Place(&g, &options); + EXPECT_EQ(error::INVALID_ARGUMENT, s.code()); + EXPECT_TRUE(str_util::StrContains( + s.error_message(), "Cannot assign a device for operation 'in'")); + EXPECT_FALSE(str_util::StrContains( + s.error_message(), "'in' (defined at ^^node:in:${file}:${line}^^)")); +} + // Test that placement fails when a node requests an explicit device that is not // supported by the registered kernels if allow_soft_placement is no set. TEST_F(PlacerTest, TestUnsupportedDeviceNoAllowSoftPlacement) { -- cgit v1.2.3