aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow
diff options
context:
space:
mode:
authorGravatar Shanqing Cai <cais@google.com>2017-03-20 12:11:05 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-03-20 13:39:28 -0700
commit3288f2eee7140e4a97c5976417fcbab5fe28a05c (patch)
tree1e1f18dbaf6fe63f566a064bbbc0ef30177a162d /tensorflow
parenta7e5032f4d5cb054d86e0c7f2b8aaab293b43d43 (diff)
tfdbg core: add configurable attributes to debug ops, DebugNumericSummary
Added three attributes to the debug op "DebugNumericSummary" used in tfdbg-based TensorBoard health pills: 1) lower_bound (type: float) 2) upper_bound (type: float) 3) mute_if_healthy (type: bool) lower_bound and upper_bound make it possible to customize thresholds beyond which tensor elements are counted as -inf or inf. mute_if_healthy makes it possible to mute a DebugNumericSummary op unless there are nan, -inf or inf elements in the watched tensor, which is useful for reducing the amount of health pill data. Changes are made in the C++ DebugNodeInserter class, so that these attributes can be directly set from Python methods such as tf_debug.watch_graph() using the following syntax in the debug_ops argument: debug_ops=["DebugNumericSummary(attribute_name=attribute_value)"] e.g., debug_ops=["DebugNumericSummary(lower_bound=-100.0; mute_if_healthy=true)"] Currently, string, float, int, and bool attribute value types are supported. Change: 150665493
Diffstat (limited to 'tensorflow')
-rw-r--r--tensorflow/core/debug/BUILD15
-rw-r--r--tensorflow/core/debug/debug_graph_utils.cc148
-rw-r--r--tensorflow/core/debug/debug_graph_utils.h15
-rw-r--r--tensorflow/core/debug/debug_graph_utils_test.cc161
-rw-r--r--tensorflow/core/kernels/debug_ops.h20
-rw-r--r--tensorflow/core/kernels/debug_ops_test.cc87
-rw-r--r--tensorflow/core/ops/debug_ops.cc31
-rw-r--r--tensorflow/python/debug/lib/debug_utils.py7
-rw-r--r--tensorflow/python/debug/lib/session_debug_testlib.py133
9 files changed, 594 insertions, 23 deletions
diff --git a/tensorflow/core/debug/BUILD b/tensorflow/core/debug/BUILD
index 4b13171c97..2035922fdc 100644
--- a/tensorflow/core/debug/BUILD
+++ b/tensorflow/core/debug/BUILD
@@ -207,6 +207,21 @@ tf_cc_test(
],
)
+tf_cc_test(
+ name = "debug_graph_utils_test",
+ size = "small",
+ srcs = ["debug_graph_utils_test.cc"],
+ linkstatic = tf_kernel_tests_linkstatic(),
+ deps = [
+ ":debug_graph_utils",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:lib_internal",
+ "//tensorflow/core:test",
+ "//tensorflow/core:test_main",
+ "//tensorflow/core:testlib",
+ ],
+)
+
# TODO(cais): Add the following back in when tfdbg is supported on Android.
# filegroup(
# name = "android_srcs",
diff --git a/tensorflow/core/debug/debug_graph_utils.cc b/tensorflow/core/debug/debug_graph_utils.cc
index 6ae5672860..b0982ec7d8 100644
--- a/tensorflow/core/debug/debug_graph_utils.cc
+++ b/tensorflow/core/debug/debug_graph_utils.cc
@@ -317,6 +317,120 @@ Status DebugNodeInserter::CreateCopyNode(
}
// static
+Status DebugNodeInserter::ParseDebugOpName(
+ const string& debug_op_name, string* debug_op_name_proper,
+ std::unordered_map<string, string>* attributes) {
+ const size_t l_index = debug_op_name.find('(');
+ const size_t r_index = debug_op_name.find(')');
+ if (l_index == string::npos && r_index == string::npos) {
+ *debug_op_name_proper = debug_op_name;
+ } else {
+ if (l_index == string::npos || l_index == 0 ||
+ r_index != debug_op_name.size() - 1) {
+ return errors::InvalidArgument("Malformed debug op name \"",
+ debug_op_name, "\"");
+ }
+
+ *debug_op_name_proper = debug_op_name.substr(0, l_index);
+ string arguments = debug_op_name.substr(l_index + 1, r_index - l_index - 1);
+
+ std::vector<string> attribute_segs = str_util::Split(arguments, ";");
+ for (const string& attribute_seg : attribute_segs) {
+ StringPiece seg(attribute_seg);
+ str_util::RemoveWhitespaceContext(&seg);
+ if (seg.empty()) {
+ continue;
+ }
+
+ const size_t eq_index = seg.find('=');
+ if (eq_index == string::npos) {
+ return errors::InvalidArgument(
+ "Malformed attributes in debug op name \"", debug_op_name, "\"");
+ }
+
+ const string key = seg.substr(0, eq_index).ToString();
+ const string value =
+ seg.substr(eq_index + 1, attribute_seg.size() - eq_index - 1)
+ .ToString();
+ if (key.empty() || value.empty()) {
+ return errors::InvalidArgument(
+ "Malformed attributes in debug op name \"", debug_op_name, "\"");
+ }
+
+ if (attributes->find(key) == attributes->end()) {
+ (*attributes)[key] = value;
+ } else {
+ return errors::InvalidArgument("Duplicate attribute name \"", key,
+ "\" found in the debug op: \"",
+ debug_op_name, "\"");
+ }
+ }
+ }
+ return Status::OK();
+}
+
+// static
+Status DebugNodeInserter::SetDebugNodeAttributes(
+ Node* debug_node, const std::unordered_map<string, string>& attributes) {
+ std::unordered_set<string> unfulfilled_keys;
+ for (const auto& item : attributes) {
+ unfulfilled_keys.insert(item.first);
+ }
+
+ for (const auto& attr : debug_node->op_def().attr()) {
+ if (attributes.find(attr.name()) != attributes.end()) {
+ const string& attr_value = attributes.at(attr.name());
+ if (attr.type() == "string") {
+ debug_node->AddAttr<string>(attr.name(), attr_value);
+ } else if (attr.type() == "float") {
+ float float_value = 0.0;
+ if (!::tensorflow::strings::safe_strtof(attr_value.c_str(),
+ &float_value)) {
+ return errors::InvalidArgument(
+ "Invalid value string for float-type attribute ", attr.name(),
+ "of debug node ", debug_node->name(), ": \"", attr_value, "\"");
+ }
+ debug_node->AddAttr<float>(attr.name(), float_value);
+ } else if (attr.type() == "int") {
+ int64 int_value = 0;
+ if (!::tensorflow::strings::safe_strto64(attr_value, &int_value)) {
+ return errors::InvalidArgument(
+ "Invalid value string for int-type attribute ", attr.name(),
+ "of debug node ", debug_node->name(), ": \"", attr_value, "\"");
+ }
+ debug_node->AddAttr<int>(attr.name(), int_value);
+ } else if (attr.type() == "bool") {
+ string bool_str = str_util::Lowercase(attr_value);
+ if (bool_str == "false" || bool_str == "f" || bool_str == "0") {
+ debug_node->AddAttr<bool>(attr.name(), false);
+ } else if (bool_str == "true" || bool_str == "t" || bool_str == "1") {
+ debug_node->AddAttr<bool>(attr.name(), true);
+ } else {
+ return errors::InvalidArgument(
+ "Invalid value string for bool-type attribute ", attr.name(),
+ "of debug node ", debug_node->name(), ": \"", attr_value, "\"");
+ }
+ } else {
+ return errors::InvalidArgument(
+ "Unsupported type of custom attribute for debug ops: ",
+ attr.type());
+ }
+
+ unfulfilled_keys.erase(attr.name());
+ }
+ }
+
+ if (unfulfilled_keys.empty()) {
+ return Status::OK();
+ } else {
+ return errors::InvalidArgument(
+ unfulfilled_keys.size(),
+ " attribute key(s) were not valid for debug node ", debug_node->name(),
+ ": ", str_util::Join(unfulfilled_keys, ", "));
+ }
+}
+
+// static
Status DebugNodeInserter::CreateDebugNode(
Graph* graph, const DeviceType device_type,
const string& src_copy_node_name, const DataType src_dt,
@@ -325,29 +439,37 @@ Status DebugNodeInserter::CreateDebugNode(
NodeDef node_def;
const KernelDef* kdef;
+ string debug_op_name_proper;
+ std::unordered_map<string, string> custom_attributes;
+ TF_RETURN_IF_ERROR(ParseDebugOpName(debug_op_name, &debug_op_name_proper,
+ &custom_attributes));
+
const string debug_node_name =
- GetDebugNodeName(tensor_name, debug_op_num, debug_op_name);
- auto builder = NodeDefBuilder(debug_node_name, debug_op_name)
+ GetDebugNodeName(tensor_name, debug_op_num, debug_op_name_proper);
+ auto builder = NodeDefBuilder(debug_node_name, debug_op_name_proper)
.Input(src_copy_node_name, 0, src_dt)
.Attr("tensor_name", tensor_name)
.Attr("debug_urls", debug_urls);
if (!builder.Finalize(&node_def).ok()) {
- return Status(
- error::FAILED_PRECONDITION,
- strings::StrCat("Failed to create node definition ", "for debug op ",
- debug_op_name, " on watched tensor ", tensor_name));
+ return errors::FailedPrecondition(
+ "Failed to create node definition for debug op ", debug_op_name_proper,
+ " on watched tensor ", tensor_name);
}
if (!FindKernelDef(device_type, node_def, &kdef, nullptr).ok()) {
- return Status(
- error::FAILED_PRECONDITION,
- strings::StrCat("Failed to find kernel definition ", "for debug op ",
- debug_op_name, " on watched tensor ", tensor_name));
+ return errors::FailedPrecondition(
+ "Failed to find kernel definition for debug op ", debug_op_name_proper,
+ " on watched tensor ", tensor_name);
}
if (!NodeBuilder(builder).Finalize(graph, debug_node).ok()) {
- return Status(error::FAILED_PRECONDITION,
- strings::StrCat("Failed to create debug node ", debug_op_name,
- " on watched tensor ", tensor_name));
+ return errors::FailedPrecondition("Failed to create debug node ",
+ debug_op_name_proper,
+ " on watched tensor ", tensor_name);
+ }
+
+ // Set custom attributes (if any).
+ if (!custom_attributes.empty()) {
+ TF_RETURN_IF_ERROR(SetDebugNodeAttributes(*debug_node, custom_attributes));
}
return Status::OK();
diff --git a/tensorflow/core/debug/debug_graph_utils.h b/tensorflow/core/debug/debug_graph_utils.h
index 6edd26c260..015149a64e 100644
--- a/tensorflow/core/debug/debug_graph_utils.h
+++ b/tensorflow/core/debug/debug_graph_utils.h
@@ -121,6 +121,19 @@ class DebugNodeInserter {
const int src_output, const DataType src_dt,
const string& tensor_name, Node** copy_node);
+ // Parse the debug_op_name string to extract proper op name and attributes.
+ // debug_op_name can be the proper op name only, e.g., "DebugNumericSummary".
+ // It can also contain customizable keys and values. Each key-value pair is
+ // connected with an equal sign ("="). Multiple key-value pairs are separated
+ // with semicolons (";"), which optional whitespace in between, e.g.,
+ // "DebugNumericSummary(mute_if_healthy=true, lower_bound=-100.0)".
+ static Status ParseDebugOpName(
+ const string& debug_op_name, string* debug_op_name_proper,
+ std::unordered_map<string, string>* attributes);
+
+ static Status SetDebugNodeAttributes(
+ Node* debug_node, const std::unordered_map<string, string>& attributes);
+
static Status CreateDebugNode(Graph* graph, const DeviceType device_type,
const string& src_copy_node_name,
const DataType src_dt,
@@ -128,6 +141,8 @@ class DebugNodeInserter {
const std::vector<string>& debug_urls,
const int debug_op_num,
const string& debug_op_name, Node** debug_node);
+
+ friend class DebugGraphUtilsTest;
};
} // namespace tensorflow
diff --git a/tensorflow/core/debug/debug_graph_utils_test.cc b/tensorflow/core/debug/debug_graph_utils_test.cc
new file mode 100644
index 0000000000..b3305e84a0
--- /dev/null
+++ b/tensorflow/core/debug/debug_graph_utils_test.cc
@@ -0,0 +1,161 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/debug/debug_graph_utils.h"
+
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+
+namespace tensorflow {
+
+class DebugGraphUtilsTest : public ::testing::Test {
+ protected:
+ Status ParseDebugOpName(const string& debug_op_name,
+ string* debug_op_name_proper,
+ std::unordered_map<string, string>* attributes) {
+ return DebugNodeInserter::ParseDebugOpName(
+ debug_op_name, debug_op_name_proper, attributes);
+ }
+};
+
+TEST_F(DebugGraphUtilsTest, TestParseNoAttributeDebugOpName) {
+ string debug_op_name_proper;
+ std::unordered_map<string, string> attributes;
+ TF_ASSERT_OK(
+ ParseDebugOpName("DebugIdentity", &debug_op_name_proper, &attributes));
+ ASSERT_EQ("DebugIdentity", debug_op_name_proper);
+ ASSERT_EQ(0, attributes.size());
+}
+
+TEST_F(DebugGraphUtilsTest, TestMalformedDebugOpName) {
+ string debug_op_name_proper;
+ std::unordered_map<string, string> attributes;
+
+ Status s = ParseDebugOpName("(mute_if_healthy=true)", &debug_op_name_proper,
+ &attributes);
+ ASSERT_EQ(errors::Code::INVALID_ARGUMENT, s.code());
+
+ s = ParseDebugOpName("DebugNumericSummary(", &debug_op_name_proper,
+ &attributes);
+ ASSERT_EQ(errors::Code::INVALID_ARGUMENT, s.code());
+
+ s = ParseDebugOpName("DebugNumericSummary)", &debug_op_name_proper,
+ &attributes);
+ ASSERT_EQ(errors::Code::INVALID_ARGUMENT, s.code());
+}
+
+TEST_F(DebugGraphUtilsTest, TestDebugOpNameWithMalformedAttributes) {
+ string debug_op_name_proper;
+ std::unordered_map<string, string> attributes;
+
+ Status s = ParseDebugOpName("DebugNumericSummary(=)", &debug_op_name_proper,
+ &attributes);
+ ASSERT_EQ(errors::Code::INVALID_ARGUMENT, s.code());
+
+ s = ParseDebugOpName("DebugNumericSummary(mute_if_healthy=)",
+ &debug_op_name_proper, &attributes);
+ ASSERT_EQ(errors::Code::INVALID_ARGUMENT, s.code());
+
+ s = ParseDebugOpName("DebugNumericSummary(=true)", &debug_op_name_proper,
+ &attributes);
+ ASSERT_EQ(errors::Code::INVALID_ARGUMENT, s.code());
+
+ s = ParseDebugOpName("DebugNumericSummary(mute_if_healthy:true)",
+ &debug_op_name_proper, &attributes);
+ ASSERT_EQ(errors::Code::INVALID_ARGUMENT, s.code());
+
+ s = ParseDebugOpName("DebugNumericSummary(mute_if_healthy=true;threshold=)",
+ &debug_op_name_proper, &attributes);
+ ASSERT_EQ(errors::Code::INVALID_ARGUMENT, s.code());
+
+ s = ParseDebugOpName(
+ "DebugNumericSummary(mute_if_healthy=true;threshold:300.0)",
+ &debug_op_name_proper, &attributes);
+ ASSERT_EQ(errors::Code::INVALID_ARGUMENT, s.code());
+}
+
+TEST_F(DebugGraphUtilsTest, TestValidDebugOpNameWithSingleAttribute) {
+ string debug_op_name_proper;
+ std::unordered_map<string, string> attributes;
+
+ TF_ASSERT_OK(ParseDebugOpName("DebugNumericSummary()", &debug_op_name_proper,
+ &attributes));
+ ASSERT_EQ("DebugNumericSummary", debug_op_name_proper);
+ ASSERT_EQ(0, attributes.size());
+
+ attributes.clear();
+ TF_ASSERT_OK(ParseDebugOpName("DebugNumericSummary(mute_if_healthy=true)",
+ &debug_op_name_proper, &attributes));
+ ASSERT_EQ("DebugNumericSummary", debug_op_name_proper);
+ ASSERT_EQ(1, attributes.size());
+ ASSERT_EQ("true", attributes["mute_if_healthy"]);
+}
+
+TEST_F(DebugGraphUtilsTest, TestValidDebugOpNameWithMoreThanOneAttributes) {
+ string debug_op_name_proper;
+ std::unordered_map<string, string> attributes;
+ TF_ASSERT_OK(ParseDebugOpName(
+ "DebugNumericSummary(mute_if_healthy=true; threshold=300.0)",
+ &debug_op_name_proper, &attributes));
+ ASSERT_EQ("DebugNumericSummary", debug_op_name_proper);
+ ASSERT_EQ(2, attributes.size());
+ ASSERT_EQ("true", attributes["mute_if_healthy"]);
+ ASSERT_EQ("300.0", attributes["threshold"]);
+
+ attributes.clear();
+ TF_ASSERT_OK(ParseDebugOpName(
+ "DebugNumericSummary(mute_if_healthy=true;threshold=300.0;first_n=100)",
+ &debug_op_name_proper, &attributes));
+ ASSERT_EQ("DebugNumericSummary", debug_op_name_proper);
+ ASSERT_EQ(3, attributes.size());
+ ASSERT_EQ("true", attributes["mute_if_healthy"]);
+ ASSERT_EQ("300.0", attributes["threshold"]);
+ ASSERT_EQ("100", attributes["first_n"]);
+}
+
+TEST_F(DebugGraphUtilsTest, TestValidDebugOpNameWithMoreDuplicatettributes) {
+ string debug_op_name_proper;
+ std::unordered_map<string, string> attributes;
+ Status s = ParseDebugOpName(
+ "DebugNumericSummary(mute_if_healthy=true; lower_bound=3; "
+ "mute_if_healthy=false;)",
+ &debug_op_name_proper, &attributes);
+ ASSERT_EQ(errors::Code::INVALID_ARGUMENT, s.code());
+}
+
+TEST_F(DebugGraphUtilsTest, TestValidDebugOpNameWithWhitespaceInAttributes) {
+ string debug_op_name_proper;
+ std::unordered_map<string, string> attributes;
+
+ TF_ASSERT_OK(ParseDebugOpName(
+ "DebugNumericSummary( mute_if_healthy=true; threshold=300.0 )",
+ &debug_op_name_proper, &attributes));
+ ASSERT_EQ("DebugNumericSummary", debug_op_name_proper);
+ ASSERT_EQ(2, attributes.size());
+ ASSERT_EQ("true", attributes["mute_if_healthy"]);
+ ASSERT_EQ("300.0", attributes["threshold"]);
+
+ attributes.clear();
+ TF_ASSERT_OK(ParseDebugOpName(
+ "DebugNumericSummary(;;mute_if_healthy=true; threshold=300.0;;)",
+ &debug_op_name_proper, &attributes));
+ ASSERT_EQ("DebugNumericSummary", debug_op_name_proper);
+ ASSERT_EQ(2, attributes.size());
+ ASSERT_EQ("true", attributes["mute_if_healthy"]);
+ ASSERT_EQ("300.0", attributes["threshold"]);
+}
+
+} // namespace tensorflow
diff --git a/tensorflow/core/kernels/debug_ops.h b/tensorflow/core/kernels/debug_ops.h
index bd42997732..5437bc5a33 100644
--- a/tensorflow/core/kernels/debug_ops.h
+++ b/tensorflow/core/kernels/debug_ops.h
@@ -168,6 +168,10 @@ class DebugNumericSummaryOp : public OpKernel {
: OpKernel(context) {
OP_REQUIRES_OK(context, context->GetAttr("tensor_name", &tensor_name_));
OP_REQUIRES_OK(context, context->GetAttr("debug_urls", &debug_urls_));
+ OP_REQUIRES_OK(context, context->GetAttr("lower_bound", &lower_bound_));
+ OP_REQUIRES_OK(context, context->GetAttr("upper_bound", &upper_bound_));
+ OP_REQUIRES_OK(context,
+ context->GetAttr("mute_if_healthy", &mute_if_healthy_));
}
void Compute(OpKernelContext* context) override {
@@ -196,6 +200,9 @@ class DebugNumericSummaryOp : public OpKernel {
const T* input_flat = input.template flat<T>().data();
element_count = input_shape.num_elements();
+ const bool is_lower_bound_custom = !Eigen::numext::isinf(lower_bound_);
+ const bool is_upper_bound_custom = !Eigen::numext::isinf(upper_bound_);
+
for (int64 i = 0; i < element_count; ++i) {
const double x = static_cast<double>(input_flat[i]);
if (Eigen::numext::isnan(x)) {
@@ -207,7 +214,11 @@ class DebugNumericSummaryOp : public OpKernel {
positive_inf_count++;
}
} else {
- if (x < 0.0) {
+ if (is_lower_bound_custom && x <= lower_bound_) {
+ negative_inf_count++;
+ } else if (is_upper_bound_custom && x >= upper_bound_) {
+ positive_inf_count++;
+ } else if (x < 0.0) {
negative_count++;
} else if (x > 0.0) {
positive_count++;
@@ -259,7 +270,9 @@ class DebugNumericSummaryOp : public OpKernel {
output_tensor->vec<double>()(10) = mean;
output_tensor->vec<double>()(11) = variance;
- if (!debug_urls_.empty()) {
+ bool mute = mute_if_healthy_ && nan_count == 0 && negative_inf_count == 0 &&
+ positive_inf_count == 0;
+ if (!mute && !debug_urls_.empty()) {
// TODO(b/32704451): Don't just ignore the ::tensorflow::Status object!
DebugIO::PublishDebugTensor(tensor_name_, "DebugNumericSummary",
*output_tensor, Env::Default()->NowMicros(),
@@ -273,6 +286,9 @@ class DebugNumericSummaryOp : public OpKernel {
private:
string tensor_name_;
std::vector<string> debug_urls_;
+ float lower_bound_;
+ float upper_bound_;
+ bool mute_if_healthy_;
};
} // namespace tensorflow
diff --git a/tensorflow/core/kernels/debug_ops_test.cc b/tensorflow/core/kernels/debug_ops_test.cc
index 152afc12a1..917d4c5299 100644
--- a/tensorflow/core/kernels/debug_ops_test.cc
+++ b/tensorflow/core/kernels/debug_ops_test.cc
@@ -485,5 +485,92 @@ TEST_F(DebugNumericSummaryOpTest, BoolSuccess) {
test::ExpectTensorNear<double>(expected, *GetOutput(0), 1e-8);
}
+// Tests for DebugNumericSummaryOp
+class DebugNumericSummaryOpCustomLowerBoundTest : public OpsTestBase {
+ protected:
+ Status Init(DataType input_type) {
+ TF_CHECK_OK(NodeDefBuilder("op", "DebugNumericSummary")
+ .Input(FakeInput(input_type))
+ .Attr("tensor_name", "FakeTensor:0")
+ .Attr("lower_bound", -1.2f)
+ .Finalize(node_def()));
+ return InitOp();
+ }
+};
+
+TEST_F(DebugNumericSummaryOpCustomLowerBoundTest, Float_full_house) {
+ TF_ASSERT_OK(Init(DT_FLOAT));
+ AddInputFromArray<float>(
+ TensorShape({18}),
+ {std::numeric_limits<float>::quiet_NaN(),
+ std::numeric_limits<float>::quiet_NaN(), 0.0f, 0.0f, 0.0f, -1.0f, -3.0f,
+ 3.0f, 7.0f, -std::numeric_limits<float>::infinity(),
+ -std::numeric_limits<float>::infinity(),
+ std::numeric_limits<float>::infinity(),
+ std::numeric_limits<float>::infinity(),
+ std::numeric_limits<float>::infinity(),
+ std::numeric_limits<float>::infinity(),
+ std::numeric_limits<float>::infinity(),
+ std::numeric_limits<float>::quiet_NaN(),
+ std::numeric_limits<float>::quiet_NaN()});
+ TF_ASSERT_OK(RunOpKernel());
+
+ Tensor expected(allocator(), DT_DOUBLE, TensorShape({12}));
+ test::FillValues<double>(
+ &expected,
+ {1.0, // Is initialized.
+ 18.0, // Total element count.
+ 4.0, // nan count.
+ 3.0, // -inf count.
+ 1.0, // negative number count (excluding -inf).
+ 3.0, // zero count.
+ 2.0, // positive number count (excluding +inf).
+ 5.0, // +inf count.
+ -3.0, // minimum of non-inf and non-nan elements.
+ 7.0, // maximum of non-inf and non-nan elements.
+ 0.85714285714, // mean of non-inf and non-nan elements.
+ 8.97959183673}); // variance of non-inf and non-nan elements.
+
+ test::ExpectTensorNear<double>(expected, *GetOutput(0), 1e-8);
+}
+
+// Tests for DebugNumericSummaryOp
+class DebugNumericSummaryOpCustomLowerUpperBoundsTest : public OpsTestBase {
+ protected:
+ Status Init(DataType input_type) {
+ TF_CHECK_OK(NodeDefBuilder("op", "DebugNumericSummary")
+ .Input(FakeInput(input_type))
+ .Attr("tensor_name", "FakeTensor:0")
+ .Attr("lower_bound", -0.5f)
+ .Attr("upper_bound", 3.6f)
+ .Finalize(node_def()));
+ return InitOp();
+ }
+};
+
+TEST_F(DebugNumericSummaryOpCustomLowerUpperBoundsTest, Int32Success) {
+ TF_ASSERT_OK(Init(DT_INT32));
+ AddInputFromArray<int32>(TensorShape({2, 3}), {0, 0, -1, 3, 3, 7});
+ TF_ASSERT_OK(RunOpKernel());
+
+ Tensor expected(allocator(), DT_DOUBLE, TensorShape({12}));
+ test::FillValues<double>(
+ &expected,
+ {1.0, // Is initialized.
+ 6.0, // Total element count.
+ 0.0, // nan count.
+ 1.0, // -inf count.
+ 0.0, // negative count (excluding -inf).
+ 2.0, // zero count.
+ 2.0, // positive count (excluding +inf).
+ 1.0, // +inf count.
+ -1.0, // minimum of non-inf and non-nan elements.
+ 7.0, // maximum of non-inf and non-nan elements.
+ 2.0, // mean of non-inf and non-nan elements.
+ 7.33333333333}); // variance of non-inf and non-nan elements.
+
+ test::ExpectTensorNear<double>(expected, *GetOutput(0), 1e-8);
+}
+
} // namespace
} // namespace tensorflow
diff --git a/tensorflow/core/ops/debug_ops.cc b/tensorflow/core/ops/debug_ops.cc
index 66dbd9cc63..63f6b60584 100644
--- a/tensorflow/core/ops/debug_ops.cc
+++ b/tensorflow/core/ops/debug_ops.cc
@@ -101,6 +101,9 @@ REGISTER_OP("DebugNumericSummary")
.Attr("T: type")
.Attr("tensor_name: string = ''")
.Attr("debug_urls: list(string) = []")
+ .Attr("lower_bound: float = -inf")
+ .Attr("upper_bound: float = inf")
+ .Attr("mute_if_healthy: bool = false")
.SetAllowsUninitializedInput()
.Doc(R"doc(
Debug Numeric Summary Op.
@@ -111,12 +114,16 @@ input: Input tensor, non-Reference type, float or double.
output: A double tensor of shape [12], the elements of which are:
[0]: is initialized (1.0) or not (0.0).
[1]: total number of elements
- [2]: -inf count
- [3]: negative element count (excluding -inf)
- [4]: zero element count
- [5]: positive element count (excluding +inf)
- [6]: +inf element count
- [7]: NaN element count
+ [2]: NaN element count
+ [3]: generalized -inf count: elements <= lower_bound. lower_bound is -inf by
+ default.
+ [4]: negative element count (excluding -inf), if lower_bound is the default
+ -inf. Otherwise, this is the count of elements > lower_bound and < 0.
+ [5]: zero element count
+ [6]: positive element count (excluding +inf), if upper_bound is the default
+ -inf. Otherwise, this is the count of elements < upper_bound and > 0.
+ [7]: generalized +inf count, elements >= upper_bound. upper_bound is +inf by
+ default.
Output elements [1:8] are all zero, if the tensor is uninitialized.
[8]: minimum of all non-inf and non-NaN elements.
If uninitialized or no such element exists: +inf.
@@ -129,7 +136,15 @@ Output elements [1:8] are all zero, if the tensor is uninitialized.
tensor_name: Name of the input tensor.
debug_urls: List of URLs to debug targets, e.g.,
- file:///foo/tfdbg_dump, grpc:://localhost:11011
+ file:///foo/tfdbg_dump, grpc:://localhost:11011
+lower_bound: (float) The lower bound <= which values will be included in the
+ generalized -inf count. Default: -inf.
+upper_bound: (float) The upper bound >= which values will be included in the
+ generalized +inf count. Default: +inf.
+mute_if_healthy: (bool) Do not send data to the debug URLs unless at least one
+ of elements [2], [3] and [7] (i.e., the nan count and the generalized -inf and
+ inf counts) is non-zero.
+
)doc");
-} // namespace tensorflow \ No newline at end of file
+} // namespace tensorflow
diff --git a/tensorflow/python/debug/lib/debug_utils.py b/tensorflow/python/debug/lib/debug_utils.py
index 7163936631..1a15c0391d 100644
--- a/tensorflow/python/debug/lib/debug_utils.py
+++ b/tensorflow/python/debug/lib/debug_utils.py
@@ -42,6 +42,9 @@ def add_debug_tensor_watch(run_options,
debug_ops: (`str` or `list` of `str`) name(s) of the debug op(s). Can be a
`list` of `str` or a single `str`. The latter case is equivalent to a
`list` of `str` with only one element.
+ For debug op types with customizable attributes, each debug op string can
+ optionally contain a list of attribute names, in the syntax of:
+ debug_op_name(attr_name_1=attr_value_1;attr_name_2=attr_value_2;...)
debug_urls: (`str` or `list` of `str`) URL(s) to send debug values to,
e.g., `file:///tmp/tfdbg_dump_1`, `grpc://localhost:12345`.
tolerate_debug_op_creation_failures: (`bool`) Whether to tolerate debug op
@@ -97,6 +100,9 @@ def watch_graph(run_options,
a single string, or None. The case of a single string is equivalent to
a list consisting of a single string, e.g., `file:///tmp/tfdbg_dump_1`,
`grpc://localhost:12345`.
+ For debug op types with customizable attributes, each debug op name string
+ can optionally contain a list of attribute names, in the syntax of:
+ debug_op_name(attr_name_1=attr_value_1;attr_name_2=attr_value_2;...)
node_name_regex_whitelist: Regular-expression whitelist for node_name,
e.g., `"(weight_[0-9]+|bias_.*)"`
op_type_regex_whitelist: Regular-expression whitelist for the op type of
@@ -178,6 +184,7 @@ def watch_graph_with_blacklists(run_options,
run_options: An instance of `config_pb2.RunOptions` to be modified.
graph: An instance of `ops.Graph`.
debug_ops: (`str` or `list` of `str`) name(s) of the debug op(s) to use.
+ See the documentation of `watch_graph` for more details.
debug_urls: URL(s) to send debug values to, e.g.,
`file:///tmp/tfdbg_dump_1`, `grpc://localhost:12345`.
node_name_regex_blacklist: Regular-expression blacklist for node_name.
diff --git a/tensorflow/python/debug/lib/session_debug_testlib.py b/tensorflow/python/debug/lib/session_debug_testlib.py
index fd4b4aecd6..d733a5e210 100644
--- a/tensorflow/python/debug/lib/session_debug_testlib.py
+++ b/tensorflow/python/debug/lib/session_debug_testlib.py
@@ -1146,6 +1146,139 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
self.assertIn("n:0:DebugNumericSummary", dump.debug_watch_keys("n"))
self.assertIn("m:0:DebugNumericSummary", dump.debug_watch_keys("m"))
+ def testDebugNumericSummaryInvalidAttributesStringAreCaught(self):
+ with session.Session() as sess:
+ a = variables.Variable(10.0, name="a")
+ b = variables.Variable(0.0, name="b")
+ c = variables.Variable(0.0, name="c")
+
+ x = math_ops.divide(a, b, name="x")
+ y = math_ops.multiply(x, c, name="y")
+
+ sess.run(variables.global_variables_initializer())
+
+ run_metadata = config_pb2.RunMetadata()
+ run_options = config_pb2.RunOptions(output_partition_graphs=True)
+ debug_utils.watch_graph(
+ run_options,
+ sess.graph,
+ debug_ops=["DebugNumericSummary(foo=1.0)"],
+ debug_urls=self._debug_urls())
+ with self.assertRaisesRegexp(
+ errors.FailedPreconditionError,
+ r"1 attribute key\(s\) were not valid for debug node "
+ r"__dbg_a:0_0_DebugNumericSummary: foo"):
+ sess.run(y, options=run_options, run_metadata=run_metadata)
+
+ run_options = config_pb2.RunOptions(output_partition_graphs=True)
+ debug_utils.watch_graph(
+ run_options,
+ sess.graph,
+ debug_ops=["DebugNumericSummary(foo=1.0; bar=false)"],
+ debug_urls=self._debug_urls())
+ with self.assertRaisesRegexp(
+ errors.FailedPreconditionError,
+ r"2 attribute key\(s\) were not valid for debug node "
+ r"__dbg_a:0_0_DebugNumericSummary:"):
+ sess.run(y, options=run_options, run_metadata=run_metadata)
+
+ run_options = config_pb2.RunOptions(output_partition_graphs=True)
+ debug_utils.watch_graph(
+ run_options,
+ sess.graph,
+ debug_ops=["DebugNumericSummary(foo=1.0; mute_if_healthy=true)"],
+ debug_urls=self._debug_urls())
+ with self.assertRaisesRegexp(
+ errors.FailedPreconditionError,
+ r"1 attribute key\(s\) were not valid for debug node "
+ r"__dbg_a:0_0_DebugNumericSummary: foo"):
+ sess.run(y, options=run_options, run_metadata=run_metadata)
+
+ def testDebugNumericSummaryMuteOnHealthyMutesOnlyHealthyTensorDumps(self):
+ with session.Session() as sess:
+ a = variables.Variable(10.0, name="a")
+ b = variables.Variable(0.0, name="b")
+ c = variables.Variable(0.0, name="c")
+
+ x = math_ops.divide(a, b, name="x")
+ y = math_ops.multiply(x, c, name="y")
+
+ sess.run(variables.global_variables_initializer())
+
+ run_metadata = config_pb2.RunMetadata()
+ run_options = config_pb2.RunOptions(output_partition_graphs=True)
+ debug_utils.watch_graph(
+ run_options,
+ sess.graph,
+ debug_ops=["DebugNumericSummary(mute_if_healthy=true)"],
+ debug_urls=self._debug_urls())
+ sess.run(y, options=run_options, run_metadata=run_metadata)
+
+ dump = debug_data.DebugDumpDir(
+ self._dump_root, partition_graphs=run_metadata.partition_graphs,
+ validate=False)
+ # Here, validate=False is necessary to avoid causality check error.
+ # TODO(cais): Maybe let DebugDumpDir constructor automatically ignore
+ # debug ops with mute_if_healthy=false attribute during validation.
+
+ self.assertEqual(2, dump.size)
+ self.assertAllClose(
+ [[1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, np.inf, -np.inf, np.nan,
+ np.nan]],
+ dump.get_tensors("x", 0, "DebugNumericSummary"))
+ self.assertAllClose(
+ [[1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, np.inf, -np.inf, np.nan,
+ np.nan]],
+ dump.get_tensors("y", 0, "DebugNumericSummary"))
+
+ # Another run with the default mute_if_healthy (false) value should
+ # dump all the tensors.
+ shutil.rmtree(self._dump_root)
+ run_metadata = config_pb2.RunMetadata()
+ run_options = config_pb2.RunOptions(output_partition_graphs=True)
+ debug_utils.watch_graph(
+ run_options,
+ sess.graph,
+ debug_ops=["DebugNumericSummary()"],
+ debug_urls=self._debug_urls())
+ sess.run(y, options=run_options, run_metadata=run_metadata)
+
+ dump = debug_data.DebugDumpDir(
+ self._dump_root, partition_graphs=run_metadata.partition_graphs)
+ self.assertEqual(8, dump.size)
+
+ def testDebugNumericSummaryMuteOnHealthyAndCustomBoundsWork(self):
+ with session.Session() as sess:
+ a = variables.Variable([10.0, 10.0], name="a")
+ b = variables.Variable([10.0, 2.0], name="b")
+
+ x = math_ops.add(a, b, name="x") # [20.0, 12.0]
+ y = math_ops.divide(x, b, name="y") # [2.0, 6.0]
+
+ sess.run(variables.global_variables_initializer())
+
+ run_metadata = config_pb2.RunMetadata()
+ run_options = config_pb2.RunOptions(output_partition_graphs=True)
+ debug_utils.watch_graph(
+ run_options,
+ sess.graph,
+ debug_ops=[
+ "DebugNumericSummary(mute_if_healthy=true; upper_bound=11.0)"],
+ debug_urls=self._debug_urls())
+ sess.run(y, options=run_options, run_metadata=run_metadata)
+
+ dump = debug_data.DebugDumpDir(
+ self._dump_root, partition_graphs=run_metadata.partition_graphs,
+ validate=False)
+ # Here, validate=False is necessary to avoid causality check error.
+ # TODO(cais): Maybe let DebugDumpDir constructor automatically ignore
+ # debug ops with mute_if_healthy=false attribute during validation.
+
+ self.assertEqual(1, dump.size)
+ self.assertAllClose(
+ [[1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 12.0, 20.0, 16.0, 16.0]],
+ dump.get_tensors("x", 0, "DebugNumericSummary"))
+
def testDebugQueueOpsDoesNotoErrorOut(self):
with session.Session() as sess:
q = data_flow_ops.FIFOQueue(3, "float", name="fifo_queue")