diff options
author | 2017-03-20 12:11:05 -0800 | |
---|---|---|
committer | 2017-03-20 13:39:28 -0700 | |
commit | 3288f2eee7140e4a97c5976417fcbab5fe28a05c (patch) | |
tree | 1e1f18dbaf6fe63f566a064bbbc0ef30177a162d /tensorflow/core/debug | |
parent | a7e5032f4d5cb054d86e0c7f2b8aaab293b43d43 (diff) |
tfdbg core: add configurable attributes to debug ops, DebugNumericSummary
Added three attributes to the debug op "DebugNumericSummary" used in tfdbg-based TensorBoard health pills:
1) lower_bound (type: float)
2) upper_bound (type: float)
3) mute_if_healthy (type: bool)
lower_bound and upper_bound make it possible to customize thresholds beyond which tensor elements are counted as -inf or inf. mute_if_healthy makes it possible to mute a DebugNumericSummary op unless there are nan, -inf or inf elements in the watched tensor, which is useful for reducing the amount of health pill data.
Changes are made in the C++ DebugNodeInserter class, so that these attributes can be directly set from Python methods such as tf_debug.watch_graph() using the following syntax in the debug_ops argument:
debug_ops=["DebugNumericSummary(attribute_name=attribute_value)"]
e.g.,
debug_ops=["DebugNumericSummary(lower_bound=-100.0; mute_if_healthy=true)"]
Currently, string, float, int, and bool attribute value types are supported.
Change: 150665493
Diffstat (limited to 'tensorflow/core/debug')
-rw-r--r-- | tensorflow/core/debug/BUILD | 15 | ||||
-rw-r--r-- | tensorflow/core/debug/debug_graph_utils.cc | 148 | ||||
-rw-r--r-- | tensorflow/core/debug/debug_graph_utils.h | 15 | ||||
-rw-r--r-- | tensorflow/core/debug/debug_graph_utils_test.cc | 161 |
4 files changed, 326 insertions, 13 deletions
diff --git a/tensorflow/core/debug/BUILD b/tensorflow/core/debug/BUILD index 4b13171c97..2035922fdc 100644 --- a/tensorflow/core/debug/BUILD +++ b/tensorflow/core/debug/BUILD @@ -207,6 +207,21 @@ tf_cc_test( ], ) +tf_cc_test( + name = "debug_graph_utils_test", + size = "small", + srcs = ["debug_graph_utils_test.cc"], + linkstatic = tf_kernel_tests_linkstatic(), + deps = [ + ":debug_graph_utils", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + # TODO(cais): Add the following back in when tfdbg is supported on Android. # filegroup( # name = "android_srcs", diff --git a/tensorflow/core/debug/debug_graph_utils.cc b/tensorflow/core/debug/debug_graph_utils.cc index 6ae5672860..b0982ec7d8 100644 --- a/tensorflow/core/debug/debug_graph_utils.cc +++ b/tensorflow/core/debug/debug_graph_utils.cc @@ -317,6 +317,120 @@ Status DebugNodeInserter::CreateCopyNode( } // static +Status DebugNodeInserter::ParseDebugOpName( + const string& debug_op_name, string* debug_op_name_proper, + std::unordered_map<string, string>* attributes) { + const size_t l_index = debug_op_name.find('('); + const size_t r_index = debug_op_name.find(')'); + if (l_index == string::npos && r_index == string::npos) { + *debug_op_name_proper = debug_op_name; + } else { + if (l_index == string::npos || l_index == 0 || + r_index != debug_op_name.size() - 1) { + return errors::InvalidArgument("Malformed debug op name \"", + debug_op_name, "\""); + } + + *debug_op_name_proper = debug_op_name.substr(0, l_index); + string arguments = debug_op_name.substr(l_index + 1, r_index - l_index - 1); + + std::vector<string> attribute_segs = str_util::Split(arguments, ";"); + for (const string& attribute_seg : attribute_segs) { + StringPiece seg(attribute_seg); + str_util::RemoveWhitespaceContext(&seg); + if (seg.empty()) { + continue; + } + + const size_t eq_index = seg.find('='); + if (eq_index == string::npos) { + return errors::InvalidArgument( + "Malformed attributes in debug op name \"", debug_op_name, "\""); + } + + const string key = seg.substr(0, eq_index).ToString(); + const string value = + seg.substr(eq_index + 1, attribute_seg.size() - eq_index - 1) + .ToString(); + if (key.empty() || value.empty()) { + return errors::InvalidArgument( + "Malformed attributes in debug op name \"", debug_op_name, "\""); + } + + if (attributes->find(key) == attributes->end()) { + (*attributes)[key] = value; + } else { + return errors::InvalidArgument("Duplicate attribute name \"", key, + "\" found in the debug op: \"", + debug_op_name, "\""); + } + } + } + return Status::OK(); +} + +// static +Status DebugNodeInserter::SetDebugNodeAttributes( + Node* debug_node, const std::unordered_map<string, string>& attributes) { + std::unordered_set<string> unfulfilled_keys; + for (const auto& item : attributes) { + unfulfilled_keys.insert(item.first); + } + + for (const auto& attr : debug_node->op_def().attr()) { + if (attributes.find(attr.name()) != attributes.end()) { + const string& attr_value = attributes.at(attr.name()); + if (attr.type() == "string") { + debug_node->AddAttr<string>(attr.name(), attr_value); + } else if (attr.type() == "float") { + float float_value = 0.0; + if (!::tensorflow::strings::safe_strtof(attr_value.c_str(), + &float_value)) { + return errors::InvalidArgument( + "Invalid value string for float-type attribute ", attr.name(), + "of debug node ", debug_node->name(), ": \"", attr_value, "\""); + } + debug_node->AddAttr<float>(attr.name(), float_value); + } else if (attr.type() == "int") { + int64 int_value = 0; + if (!::tensorflow::strings::safe_strto64(attr_value, &int_value)) { + return errors::InvalidArgument( + "Invalid value string for int-type attribute ", attr.name(), + "of debug node ", debug_node->name(), ": \"", attr_value, "\""); + } + debug_node->AddAttr<int>(attr.name(), int_value); + } else if (attr.type() == "bool") { + string bool_str = str_util::Lowercase(attr_value); + if (bool_str == "false" || bool_str == "f" || bool_str == "0") { + debug_node->AddAttr<bool>(attr.name(), false); + } else if (bool_str == "true" || bool_str == "t" || bool_str == "1") { + debug_node->AddAttr<bool>(attr.name(), true); + } else { + return errors::InvalidArgument( + "Invalid value string for bool-type attribute ", attr.name(), + "of debug node ", debug_node->name(), ": \"", attr_value, "\""); + } + } else { + return errors::InvalidArgument( + "Unsupported type of custom attribute for debug ops: ", + attr.type()); + } + + unfulfilled_keys.erase(attr.name()); + } + } + + if (unfulfilled_keys.empty()) { + return Status::OK(); + } else { + return errors::InvalidArgument( + unfulfilled_keys.size(), + " attribute key(s) were not valid for debug node ", debug_node->name(), + ": ", str_util::Join(unfulfilled_keys, ", ")); + } +} + +// static Status DebugNodeInserter::CreateDebugNode( Graph* graph, const DeviceType device_type, const string& src_copy_node_name, const DataType src_dt, @@ -325,29 +439,37 @@ Status DebugNodeInserter::CreateDebugNode( NodeDef node_def; const KernelDef* kdef; + string debug_op_name_proper; + std::unordered_map<string, string> custom_attributes; + TF_RETURN_IF_ERROR(ParseDebugOpName(debug_op_name, &debug_op_name_proper, + &custom_attributes)); + const string debug_node_name = - GetDebugNodeName(tensor_name, debug_op_num, debug_op_name); - auto builder = NodeDefBuilder(debug_node_name, debug_op_name) + GetDebugNodeName(tensor_name, debug_op_num, debug_op_name_proper); + auto builder = NodeDefBuilder(debug_node_name, debug_op_name_proper) .Input(src_copy_node_name, 0, src_dt) .Attr("tensor_name", tensor_name) .Attr("debug_urls", debug_urls); if (!builder.Finalize(&node_def).ok()) { - return Status( - error::FAILED_PRECONDITION, - strings::StrCat("Failed to create node definition ", "for debug op ", - debug_op_name, " on watched tensor ", tensor_name)); + return errors::FailedPrecondition( + "Failed to create node definition for debug op ", debug_op_name_proper, + " on watched tensor ", tensor_name); } if (!FindKernelDef(device_type, node_def, &kdef, nullptr).ok()) { - return Status( - error::FAILED_PRECONDITION, - strings::StrCat("Failed to find kernel definition ", "for debug op ", - debug_op_name, " on watched tensor ", tensor_name)); + return errors::FailedPrecondition( + "Failed to find kernel definition for debug op ", debug_op_name_proper, + " on watched tensor ", tensor_name); } if (!NodeBuilder(builder).Finalize(graph, debug_node).ok()) { - return Status(error::FAILED_PRECONDITION, - strings::StrCat("Failed to create debug node ", debug_op_name, - " on watched tensor ", tensor_name)); + return errors::FailedPrecondition("Failed to create debug node ", + debug_op_name_proper, + " on watched tensor ", tensor_name); + } + + // Set custom attributes (if any). + if (!custom_attributes.empty()) { + TF_RETURN_IF_ERROR(SetDebugNodeAttributes(*debug_node, custom_attributes)); } return Status::OK(); diff --git a/tensorflow/core/debug/debug_graph_utils.h b/tensorflow/core/debug/debug_graph_utils.h index 6edd26c260..015149a64e 100644 --- a/tensorflow/core/debug/debug_graph_utils.h +++ b/tensorflow/core/debug/debug_graph_utils.h @@ -121,6 +121,19 @@ class DebugNodeInserter { const int src_output, const DataType src_dt, const string& tensor_name, Node** copy_node); + // Parse the debug_op_name string to extract proper op name and attributes. + // debug_op_name can be the proper op name only, e.g., "DebugNumericSummary". + // It can also contain customizable keys and values. Each key-value pair is + // connected with an equal sign ("="). Multiple key-value pairs are separated + // with semicolons (";"), which optional whitespace in between, e.g., + // "DebugNumericSummary(mute_if_healthy=true, lower_bound=-100.0)". + static Status ParseDebugOpName( + const string& debug_op_name, string* debug_op_name_proper, + std::unordered_map<string, string>* attributes); + + static Status SetDebugNodeAttributes( + Node* debug_node, const std::unordered_map<string, string>& attributes); + static Status CreateDebugNode(Graph* graph, const DeviceType device_type, const string& src_copy_node_name, const DataType src_dt, @@ -128,6 +141,8 @@ class DebugNodeInserter { const std::vector<string>& debug_urls, const int debug_op_num, const string& debug_op_name, Node** debug_node); + + friend class DebugGraphUtilsTest; }; } // namespace tensorflow diff --git a/tensorflow/core/debug/debug_graph_utils_test.cc b/tensorflow/core/debug/debug_graph_utils_test.cc new file mode 100644 index 0000000000..b3305e84a0 --- /dev/null +++ b/tensorflow/core/debug/debug_graph_utils_test.cc @@ -0,0 +1,161 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/debug/debug_graph_utils.h" + +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/strings/str_util.h" + +namespace tensorflow { + +class DebugGraphUtilsTest : public ::testing::Test { + protected: + Status ParseDebugOpName(const string& debug_op_name, + string* debug_op_name_proper, + std::unordered_map<string, string>* attributes) { + return DebugNodeInserter::ParseDebugOpName( + debug_op_name, debug_op_name_proper, attributes); + } +}; + +TEST_F(DebugGraphUtilsTest, TestParseNoAttributeDebugOpName) { + string debug_op_name_proper; + std::unordered_map<string, string> attributes; + TF_ASSERT_OK( + ParseDebugOpName("DebugIdentity", &debug_op_name_proper, &attributes)); + ASSERT_EQ("DebugIdentity", debug_op_name_proper); + ASSERT_EQ(0, attributes.size()); +} + +TEST_F(DebugGraphUtilsTest, TestMalformedDebugOpName) { + string debug_op_name_proper; + std::unordered_map<string, string> attributes; + + Status s = ParseDebugOpName("(mute_if_healthy=true)", &debug_op_name_proper, + &attributes); + ASSERT_EQ(errors::Code::INVALID_ARGUMENT, s.code()); + + s = ParseDebugOpName("DebugNumericSummary(", &debug_op_name_proper, + &attributes); + ASSERT_EQ(errors::Code::INVALID_ARGUMENT, s.code()); + + s = ParseDebugOpName("DebugNumericSummary)", &debug_op_name_proper, + &attributes); + ASSERT_EQ(errors::Code::INVALID_ARGUMENT, s.code()); +} + +TEST_F(DebugGraphUtilsTest, TestDebugOpNameWithMalformedAttributes) { + string debug_op_name_proper; + std::unordered_map<string, string> attributes; + + Status s = ParseDebugOpName("DebugNumericSummary(=)", &debug_op_name_proper, + &attributes); + ASSERT_EQ(errors::Code::INVALID_ARGUMENT, s.code()); + + s = ParseDebugOpName("DebugNumericSummary(mute_if_healthy=)", + &debug_op_name_proper, &attributes); + ASSERT_EQ(errors::Code::INVALID_ARGUMENT, s.code()); + + s = ParseDebugOpName("DebugNumericSummary(=true)", &debug_op_name_proper, + &attributes); + ASSERT_EQ(errors::Code::INVALID_ARGUMENT, s.code()); + + s = ParseDebugOpName("DebugNumericSummary(mute_if_healthy:true)", + &debug_op_name_proper, &attributes); + ASSERT_EQ(errors::Code::INVALID_ARGUMENT, s.code()); + + s = ParseDebugOpName("DebugNumericSummary(mute_if_healthy=true;threshold=)", + &debug_op_name_proper, &attributes); + ASSERT_EQ(errors::Code::INVALID_ARGUMENT, s.code()); + + s = ParseDebugOpName( + "DebugNumericSummary(mute_if_healthy=true;threshold:300.0)", + &debug_op_name_proper, &attributes); + ASSERT_EQ(errors::Code::INVALID_ARGUMENT, s.code()); +} + +TEST_F(DebugGraphUtilsTest, TestValidDebugOpNameWithSingleAttribute) { + string debug_op_name_proper; + std::unordered_map<string, string> attributes; + + TF_ASSERT_OK(ParseDebugOpName("DebugNumericSummary()", &debug_op_name_proper, + &attributes)); + ASSERT_EQ("DebugNumericSummary", debug_op_name_proper); + ASSERT_EQ(0, attributes.size()); + + attributes.clear(); + TF_ASSERT_OK(ParseDebugOpName("DebugNumericSummary(mute_if_healthy=true)", + &debug_op_name_proper, &attributes)); + ASSERT_EQ("DebugNumericSummary", debug_op_name_proper); + ASSERT_EQ(1, attributes.size()); + ASSERT_EQ("true", attributes["mute_if_healthy"]); +} + +TEST_F(DebugGraphUtilsTest, TestValidDebugOpNameWithMoreThanOneAttributes) { + string debug_op_name_proper; + std::unordered_map<string, string> attributes; + TF_ASSERT_OK(ParseDebugOpName( + "DebugNumericSummary(mute_if_healthy=true; threshold=300.0)", + &debug_op_name_proper, &attributes)); + ASSERT_EQ("DebugNumericSummary", debug_op_name_proper); + ASSERT_EQ(2, attributes.size()); + ASSERT_EQ("true", attributes["mute_if_healthy"]); + ASSERT_EQ("300.0", attributes["threshold"]); + + attributes.clear(); + TF_ASSERT_OK(ParseDebugOpName( + "DebugNumericSummary(mute_if_healthy=true;threshold=300.0;first_n=100)", + &debug_op_name_proper, &attributes)); + ASSERT_EQ("DebugNumericSummary", debug_op_name_proper); + ASSERT_EQ(3, attributes.size()); + ASSERT_EQ("true", attributes["mute_if_healthy"]); + ASSERT_EQ("300.0", attributes["threshold"]); + ASSERT_EQ("100", attributes["first_n"]); +} + +TEST_F(DebugGraphUtilsTest, TestValidDebugOpNameWithMoreDuplicatettributes) { + string debug_op_name_proper; + std::unordered_map<string, string> attributes; + Status s = ParseDebugOpName( + "DebugNumericSummary(mute_if_healthy=true; lower_bound=3; " + "mute_if_healthy=false;)", + &debug_op_name_proper, &attributes); + ASSERT_EQ(errors::Code::INVALID_ARGUMENT, s.code()); +} + +TEST_F(DebugGraphUtilsTest, TestValidDebugOpNameWithWhitespaceInAttributes) { + string debug_op_name_proper; + std::unordered_map<string, string> attributes; + + TF_ASSERT_OK(ParseDebugOpName( + "DebugNumericSummary( mute_if_healthy=true; threshold=300.0 )", + &debug_op_name_proper, &attributes)); + ASSERT_EQ("DebugNumericSummary", debug_op_name_proper); + ASSERT_EQ(2, attributes.size()); + ASSERT_EQ("true", attributes["mute_if_healthy"]); + ASSERT_EQ("300.0", attributes["threshold"]); + + attributes.clear(); + TF_ASSERT_OK(ParseDebugOpName( + "DebugNumericSummary(;;mute_if_healthy=true; threshold=300.0;;)", + &debug_op_name_proper, &attributes)); + ASSERT_EQ("DebugNumericSummary", debug_op_name_proper); + ASSERT_EQ(2, attributes.size()); + ASSERT_EQ("true", attributes["mute_if_healthy"]); + ASSERT_EQ("300.0", attributes["threshold"]); +} + +} // namespace tensorflow |