aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/util/example_proto_fast_parsing.h
diff options
context:
space:
mode:
authorGravatar Derek Murray <mrry@google.com>2018-08-07 10:26:06 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-08-07 10:37:21 -0700
commitb8886649c75ae864f2532bca044e2f44fb138c95 (patch)
treed053e1c2a91a3125a7ebb4f6d084a4be713febde /tensorflow/core/util/example_proto_fast_parsing.h
parent90bf05c0d147a7e0c6e48720e17e51233b2bcd3c (diff)
[tf.data] Add feature statistics collection hooks to the tf.Example parsers.
PiperOrigin-RevId: 207737913
Diffstat (limited to 'tensorflow/core/util/example_proto_fast_parsing.h')
-rw-r--r--tensorflow/core/util/example_proto_fast_parsing.h24
1 files changed, 24 insertions, 0 deletions
diff --git a/tensorflow/core/util/example_proto_fast_parsing.h b/tensorflow/core/util/example_proto_fast_parsing.h
index 024a4518ee..db5b5ff929 100644
--- a/tensorflow/core/util/example_proto_fast_parsing.h
+++ b/tensorflow/core/util/example_proto_fast_parsing.h
@@ -59,6 +59,26 @@ struct FastParseExampleConfig {
std::vector<Dense> dense;
std::vector<Sparse> sparse;
+
+ // If `true`, `Result::feature_stats` will contain one
+ // `PerExampleFeatureStats` for each serialized example in the input.
+ bool collect_feature_stats = false;
+};
+
+// Statistics about the features in each example passed to
+// `FastParse[Single]Example()`.
+//
+// TODO(b/111553342): The gathered statistics currently have two limitations:
+// * Feature names that appear more than once will be counted multiple times.
+// * The feature values count only represents the counts for features that were
+// requested in the `FastParseExampleConfig`.
+// These could be addressed with additional work at runtime.
+struct PerExampleFeatureStats {
+ // The number of feature names in an example.
+ size_t features_count = 0;
+
+ // The sum of the number of values in each feature that is parsed.
+ size_t feature_values_count = 0;
};
// This is exactly the output of TF's ParseExample Op.
@@ -68,6 +88,10 @@ struct Result {
std::vector<Tensor> sparse_values;
std::vector<Tensor> sparse_shapes;
std::vector<Tensor> dense_values;
+
+ // This vector will be populated with one element per example if
+ // `FastParseExampleConfig::collect_feature_stats` is set to `true`.
+ std::vector<PerExampleFeatureStats> feature_stats;
};
// Parses a batch of serialized Example protos and converts them into result