diff options
author | Derek Murray <mrry@google.com> | 2018-08-07 10:26:06 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-08-07 10:37:21 -0700 |
commit | b8886649c75ae864f2532bca044e2f44fb138c95 (patch) | |
tree | d053e1c2a91a3125a7ebb4f6d084a4be713febde /tensorflow/core/util/example_proto_fast_parsing.h | |
parent | 90bf05c0d147a7e0c6e48720e17e51233b2bcd3c (diff) |
[tf.data] Add feature statistics collection hooks to the tf.Example parsers.
PiperOrigin-RevId: 207737913
Diffstat (limited to 'tensorflow/core/util/example_proto_fast_parsing.h')
-rw-r--r-- | tensorflow/core/util/example_proto_fast_parsing.h | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/tensorflow/core/util/example_proto_fast_parsing.h b/tensorflow/core/util/example_proto_fast_parsing.h index 024a4518ee..db5b5ff929 100644 --- a/tensorflow/core/util/example_proto_fast_parsing.h +++ b/tensorflow/core/util/example_proto_fast_parsing.h @@ -59,6 +59,26 @@ struct FastParseExampleConfig { std::vector<Dense> dense; std::vector<Sparse> sparse; + + // If `true`, `Result::feature_stats` will contain one + // `PerExampleFeatureStats` for each serialized example in the input. + bool collect_feature_stats = false; +}; + +// Statistics about the features in each example passed to +// `FastParse[Single]Example()`. +// +// TODO(b/111553342): The gathered statistics currently have two limitations: +// * Feature names that appear more than once will be counted multiple times. +// * The feature values count only represents the counts for features that were +// requested in the `FastParseExampleConfig`. +// These could be addressed with additional work at runtime. +struct PerExampleFeatureStats { + // The number of feature names in an example. + size_t features_count = 0; + + // The sum of the number of values in each feature that is parsed. + size_t feature_values_count = 0; }; // This is exactly the output of TF's ParseExample Op. @@ -68,6 +88,10 @@ struct Result { std::vector<Tensor> sparse_values; std::vector<Tensor> sparse_shapes; std::vector<Tensor> dense_values; + + // This vector will be populated with one element per example if + // `FastParseExampleConfig::collect_feature_stats` is set to `true`. + std::vector<PerExampleFeatureStats> feature_stats; }; // Parses a batch of serialized Example protos and converts them into result |