aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/lib/histogram
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/lib/histogram')
-rw-r--r--tensorflow/core/lib/histogram/histogram.cc247
-rw-r--r--tensorflow/core/lib/histogram/histogram.h119
-rw-r--r--tensorflow/core/lib/histogram/histogram_test.cc112
3 files changed, 478 insertions, 0 deletions
diff --git a/tensorflow/core/lib/histogram/histogram.cc b/tensorflow/core/lib/histogram/histogram.cc
new file mode 100644
index 0000000000..4c29d687b7
--- /dev/null
+++ b/tensorflow/core/lib/histogram/histogram.cc
@@ -0,0 +1,247 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "tensorflow/core/lib/histogram/histogram.h"
+#include <float.h>
+#include <math.h>
+#include "tensorflow/core/framework/summary.pb.h"
+
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/port.h"
+namespace tensorflow {
+namespace histogram {
+
+static std::vector<double>* InitDefaultBucketsInner() {
+ std::vector<double> buckets;
+ std::vector<double> neg_buckets;
+ // Make buckets whose range grows by 10% starting at 1.0e-12 up to 1.0e20
+ double v = 1.0e-12;
+ while (v < 1.0e20) {
+ buckets.push_back(v);
+ neg_buckets.push_back(-v);
+ v *= 1.1;
+ }
+ buckets.push_back(DBL_MAX);
+ neg_buckets.push_back(-DBL_MAX);
+ std::reverse(neg_buckets.begin(), neg_buckets.end());
+ std::vector<double>* result = new std::vector<double>;
+ result->insert(result->end(), neg_buckets.begin(), neg_buckets.end());
+ result->push_back(0.0);
+ result->insert(result->end(), buckets.begin(), buckets.end());
+ return result;
+}
+
+static gtl::ArraySlice<double> InitDefaultBuckets() {
+ static std::vector<double>* default_bucket_limits = InitDefaultBucketsInner();
+ return *default_bucket_limits;
+}
+
+Histogram::Histogram() : bucket_limits_(InitDefaultBuckets()) { Clear(); }
+
+// Create a histogram with a custom set of bucket limits,
+// specified in "custom_buckets[0..custom_buckets.size()-1]"
+Histogram::Histogram(gtl::ArraySlice<double> custom_bucket_limits)
+ : custom_bucket_limits_(custom_bucket_limits.begin(),
+ custom_bucket_limits.end()),
+ bucket_limits_(custom_bucket_limits_) {
+#ifndef NDEBUG
+ DCHECK_GT(bucket_limits_.size(), 0);
+ // Verify that the bucket boundaries are strictly increasing
+ for (size_t i = 1; i < bucket_limits_.size(); i++) {
+ DCHECK_GT(bucket_limits_[i], bucket_limits_[i - 1]);
+ }
+#endif
+ Clear();
+}
+
+bool Histogram::DecodeFromProto(const HistogramProto& proto) {
+ if ((proto.bucket_size() != proto.bucket_limit_size()) ||
+ (proto.bucket_size() == 0)) {
+ return false;
+ }
+ min_ = proto.min();
+ max_ = proto.max();
+ num_ = proto.num();
+ sum_ = proto.sum();
+ sum_squares_ = proto.sum_squares();
+ custom_bucket_limits_.clear();
+ custom_bucket_limits_.insert(custom_bucket_limits_.end(),
+ proto.bucket_limit().begin(),
+ proto.bucket_limit().end());
+ bucket_limits_ = custom_bucket_limits_;
+ buckets_.clear();
+ buckets_.insert(buckets_.end(), proto.bucket().begin(), proto.bucket().end());
+ return true;
+}
+
+void Histogram::Clear() {
+ min_ = bucket_limits_[bucket_limits_.size() - 1];
+ max_ = -DBL_MAX;
+ num_ = 0;
+ sum_ = 0;
+ sum_squares_ = 0;
+ buckets_.resize(bucket_limits_.size());
+ for (size_t i = 0; i < bucket_limits_.size(); i++) {
+ buckets_[i] = 0;
+ }
+}
+
+void Histogram::Add(double value) {
+ int b =
+ std::upper_bound(bucket_limits_.begin(), bucket_limits_.end(), value) -
+ bucket_limits_.begin();
+
+ buckets_[b] += 1.0;
+ if (min_ > value) min_ = value;
+ if (max_ < value) max_ = value;
+ num_++;
+ sum_ += value;
+ sum_squares_ += (value * value);
+}
+
+double Histogram::Median() const { return Percentile(50.0); }
+
+double Histogram::Percentile(double p) const {
+ if (num_ == 0.0) return 0.0;
+ double threshold = num_ * (p / 100.0);
+ double sum = 0;
+ for (size_t b = 0; b < buckets_.size(); b++) {
+ sum += buckets_[b];
+ if (sum >= threshold) {
+ // Scale linearly within this bucket
+ double left_point = (b == 0) ? min_ : bucket_limits_[b - 1];
+ double right_point = bucket_limits_[b];
+ double left_sum = sum - buckets_[b];
+ double right_sum = sum;
+ double pos = (threshold - left_sum) / (right_sum - left_sum);
+ double r = left_point + (right_point - left_point) * pos;
+ if (r < min_) r = min_;
+ if (r > max_) r = max_;
+ return r;
+ }
+ }
+ return max_;
+}
+
+double Histogram::Average() const {
+ if (num_ == 0.0) return 0;
+ return sum_ / num_;
+}
+
+double Histogram::StandardDeviation() const {
+ if (num_ == 0.0) return 0;
+ double variance = (sum_squares_ * num_ - sum_ * sum_) / (num_ * num_);
+ return sqrt(variance);
+}
+
+std::string Histogram::ToString() const {
+ std::string r;
+ char buf[200];
+ snprintf(buf, sizeof(buf), "Count: %.0f Average: %.4f StdDev: %.2f\n", num_,
+ Average(), StandardDeviation());
+ r.append(buf);
+ snprintf(buf, sizeof(buf), "Min: %.4f Median: %.4f Max: %.4f\n",
+ (num_ == 0.0 ? 0.0 : min_), Median(), max_);
+ r.append(buf);
+ r.append("------------------------------------------------------\n");
+ const double mult = num_ > 0 ? 100.0 / num_ : 0.0;
+ double sum = 0;
+ for (size_t b = 0; b < buckets_.size(); b++) {
+ if (buckets_[b] <= 0.0) continue;
+ sum += buckets_[b];
+ snprintf(buf, sizeof(buf), "[ %10.2g, %10.2g ) %7.0f %7.3f%% %7.3f%% ",
+ ((b == 0) ? -DBL_MAX : bucket_limits_[b - 1]), // left
+ bucket_limits_[b], // right
+ buckets_[b], // count
+ mult * buckets_[b], // percentage
+ mult * sum); // cum percentage
+ r.append(buf);
+
+ // Add hash marks based on percentage; 20 marks for 100%.
+ int marks = static_cast<int>(20 * (buckets_[b] / num_) + 0.5);
+ r.append(marks, '#');
+ r.push_back('\n');
+ }
+ return r;
+}
+
+void Histogram::EncodeToProto(HistogramProto* proto,
+ bool preserve_zero_buckets) const {
+ proto->Clear();
+ proto->set_min(min_);
+ proto->set_max(max_);
+ proto->set_num(num_);
+ proto->set_sum(sum_);
+ proto->set_sum_squares(sum_squares_);
+ for (size_t i = 0; i < buckets_.size();) {
+ double end = bucket_limits_[i];
+ double count = buckets_[i];
+ i++;
+ if (!preserve_zero_buckets && count <= 0.0) {
+ // Find run of empty buckets and collapse them into one
+ while (i < buckets_.size() && buckets_[i] <= 0.0) {
+ end = bucket_limits_[i];
+ count = buckets_[i];
+ i++;
+ }
+ }
+ proto->add_bucket_limit(end);
+ proto->add_bucket(count);
+ }
+ if (proto->bucket_size() == 0.0) {
+ // It's easier when we restore if we always have at least one bucket entry
+ proto->add_bucket_limit(DBL_MAX);
+ proto->add_bucket(0.0);
+ }
+}
+
+// ThreadSafeHistogram implementation.
+bool ThreadSafeHistogram::DecodeFromProto(const HistogramProto& proto) {
+ mutex_lock l(mu_);
+ return histogram_.DecodeFromProto(proto);
+}
+
+void ThreadSafeHistogram::Clear() {
+ mutex_lock l(mu_);
+ histogram_.Clear();
+}
+
+void ThreadSafeHistogram::Add(double value) {
+ mutex_lock l(mu_);
+ histogram_.Add(value);
+}
+
+void ThreadSafeHistogram::EncodeToProto(HistogramProto* proto,
+ bool preserve_zero_buckets) const {
+ mutex_lock l(mu_);
+ histogram_.EncodeToProto(proto, preserve_zero_buckets);
+}
+
+double ThreadSafeHistogram::Median() const {
+ mutex_lock l(mu_);
+ return histogram_.Median();
+}
+
+double ThreadSafeHistogram::Percentile(double p) const {
+ mutex_lock l(mu_);
+ return histogram_.Percentile(p);
+}
+
+double ThreadSafeHistogram::Average() const {
+ mutex_lock l(mu_);
+ return histogram_.Average();
+}
+
+double ThreadSafeHistogram::StandardDeviation() const {
+ mutex_lock l(mu_);
+ return histogram_.StandardDeviation();
+}
+
+std::string ThreadSafeHistogram::ToString() const {
+ mutex_lock l(mu_);
+ return histogram_.ToString();
+}
+
+} // namespace histogram
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/histogram/histogram.h b/tensorflow/core/lib/histogram/histogram.h
new file mode 100644
index 0000000000..9b655f3acb
--- /dev/null
+++ b/tensorflow/core/lib/histogram/histogram.h
@@ -0,0 +1,119 @@
+#ifndef TENSORFLOW_LIB_HISTOGRAM_HISTOGRAM_H_
+#define TENSORFLOW_LIB_HISTOGRAM_HISTOGRAM_H_
+
+#include <string>
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/platform/port.h"
+#include "tensorflow/core/platform/thread_annotations.h"
+
+namespace tensorflow {
+
+class HistogramProto;
+
+namespace histogram {
+
+class Histogram {
+ public:
+ // Create a histogram with a default set of bucket boundaries.
+ // Buckets near zero cover very small ranges (e.g. 10^-12), and each
+ // bucket range grows by ~10% as we head away from zero. The
+ // buckets cover the range from -DBL_MAX to DBL_MAX.
+ Histogram();
+
+ // Create a histogram with a custom set of bucket boundaries,
+ // specified in "custom_bucket_limits[0..custom_bucket_limits.size()-1]"
+ // REQUIRES: custom_bucket_limits[i] values are monotonically increasing.
+ // REQUIRES: custom_bucket_limits is not empty()
+ explicit Histogram(gtl::ArraySlice<double> custom_bucket_limits);
+
+ // Restore the state of a histogram that was previously encoded
+ // via Histogram::EncodeToProto. Note that only the bucket boundaries
+ // generated by EncodeToProto will be restored.
+ bool DecodeFromProto(const HistogramProto& proto);
+
+ ~Histogram() {}
+
+ void Clear();
+ void Add(double value);
+
+ // Save the current state of the histogram to "*proto". If
+ // "preserve_zero_buckets" is false, only non-zero bucket values and
+ // ranges are saved, and the bucket boundaries of zero-valued buckets
+ // are lost.
+ void EncodeToProto(HistogramProto* proto, bool preserve_zero_buckets) const;
+
+ // Return the median of the values in the histogram
+ double Median() const;
+
+ // Return the "p"th percentile [0.0..100.0] of the values in the
+ // distribution
+ double Percentile(double p) const;
+
+ // Return the average value of the distribution
+ double Average() const;
+
+ // Return the standard deviation of values in the distribution
+ double StandardDeviation() const;
+
+ // Returns a multi-line human-readable string representing the histogram
+ // contents. Example output:
+ // Count: 4 Average: 251.7475 StdDev: 432.02
+ // Min: -3.0000 Median: 5.0000 Max: 1000.0000
+ // ------------------------------------------------------
+ // [ -5, 0 ) 1 25.000% 25.000% #####
+ // [ 0, 5 ) 1 25.000% 50.000% #####
+ // [ 5, 10 ) 1 25.000% 75.000% #####
+ // [ 1000, 10000 ) 1 25.000% 100.000% #####
+ std::string ToString() const;
+
+ private:
+ double min_;
+ double max_;
+ double num_;
+ double sum_;
+ double sum_squares_;
+
+ std::vector<double> custom_bucket_limits_;
+ gtl::ArraySlice<double> bucket_limits_;
+ std::vector<double> buckets_;
+
+ TF_DISALLOW_COPY_AND_ASSIGN(Histogram);
+};
+
+// Wrapper around a Histogram object that is thread safe.
+//
+// All methods hold a lock while delegating to a Histogram object owned by the
+// ThreadSafeHistogram instance.
+//
+// See Histogram for documentation of the methods.
+class ThreadSafeHistogram {
+ public:
+ ThreadSafeHistogram() {}
+ explicit ThreadSafeHistogram(gtl::ArraySlice<double> custom_bucket_limits)
+ : histogram_(custom_bucket_limits) {}
+ bool DecodeFromProto(const HistogramProto& proto);
+
+ ~ThreadSafeHistogram() {}
+
+ void Clear();
+
+ // TODO(mdevin): It might be a good idea to provide a AddN(<many values>)
+ // method to avoid grabbing/releasing the lock when adding many values.
+ void Add(double value);
+
+ void EncodeToProto(HistogramProto* proto, bool preserve_zero_buckets) const;
+ double Median() const;
+ double Percentile(double p) const;
+ double Average() const;
+ double StandardDeviation() const;
+ std::string ToString() const;
+
+ private:
+ mutable mutex mu_;
+ Histogram histogram_ GUARDED_BY(mu_);
+};
+
+} // namespace histogram
+} // namespace tensorflow
+
+#endif // TENSORFLOW_LIB_HISTOGRAM_HISTOGRAM_H_
diff --git a/tensorflow/core/lib/histogram/histogram_test.cc b/tensorflow/core/lib/histogram/histogram_test.cc
new file mode 100644
index 0000000000..ede44fe85b
--- /dev/null
+++ b/tensorflow/core/lib/histogram/histogram_test.cc
@@ -0,0 +1,112 @@
+#include "tensorflow/core/lib/histogram/histogram.h"
+#include <float.h>
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/framework/summary.pb.h"
+#include <gtest/gtest.h>
+
+namespace tensorflow {
+namespace histogram {
+
+static void Validate(const Histogram& h) {
+ string s1 = h.ToString();
+ LOG(ERROR) << s1;
+
+ HistogramProto proto_with_zeroes;
+ h.EncodeToProto(&proto_with_zeroes, true);
+ Histogram h2;
+ EXPECT_TRUE(h2.DecodeFromProto(proto_with_zeroes));
+ string s2 = h2.ToString();
+ LOG(ERROR) << s2;
+
+ EXPECT_EQ(s1, s2);
+
+ HistogramProto proto_no_zeroes;
+ h.EncodeToProto(&proto_no_zeroes, false);
+ LOG(ERROR) << proto_no_zeroes.DebugString();
+ Histogram h3;
+ EXPECT_TRUE(h3.DecodeFromProto(proto_no_zeroes));
+ string s3 = h3.ToString();
+ LOG(ERROR) << s3;
+
+ EXPECT_EQ(s1, s3);
+}
+
+TEST(Histogram, Empty) {
+ Histogram h;
+ Validate(h);
+}
+
+TEST(Histogram, SingleValue) {
+ Histogram h;
+ h.Add(-3.0);
+ Validate(h);
+}
+
+TEST(Histogram, CustomBuckets) {
+ Histogram h({-10, -5, 0, 5, 10, 100, 1000, 10000, DBL_MAX});
+ h.Add(-3.0);
+ h.Add(4.99);
+ h.Add(5.0);
+ h.Add(1000.0);
+ Validate(h);
+}
+
+TEST(Histogram, Percentile) {
+ Histogram h({0, 10, 100, DBL_MAX});
+ h.Add(-2);
+ h.Add(-2);
+ h.Add(0);
+ double median = h.Percentile(50.0);
+ EXPECT_EQ(median, -0.5);
+}
+
+TEST(Histogram, Basic) {
+ Histogram h;
+ for (int i = 0; i < 100; i++) {
+ h.Add(i);
+ }
+ for (int i = 1000; i < 100000; i += 1000) {
+ h.Add(i);
+ }
+ Validate(h);
+}
+
+TEST(ThreadSafeHistogram, Basic) {
+ // Fill a normal histogram.
+ Histogram h;
+ for (int i = 0; i < 100; i++) {
+ h.Add(i);
+ }
+
+ // Fill a thread-safe histogram with the same values.
+ ThreadSafeHistogram tsh;
+ for (int i = 0; i < 100; i++) {
+ tsh.Add(i);
+ }
+
+ for (int i = 0; i < 2; ++i) {
+ bool preserve_zero_buckets = (i == 0);
+ HistogramProto h_proto;
+ h.EncodeToProto(&h_proto, preserve_zero_buckets);
+ HistogramProto tsh_proto;
+ tsh.EncodeToProto(&tsh_proto, preserve_zero_buckets);
+
+ // Let's decode from the proto of the other histogram type.
+ Histogram h2;
+ EXPECT_TRUE(h2.DecodeFromProto(tsh_proto));
+ ThreadSafeHistogram tsh2;
+ EXPECT_TRUE(tsh2.DecodeFromProto(h_proto));
+
+ // Now let's reencode and check they match.
+ EXPECT_EQ(h2.ToString(), tsh2.ToString());
+ }
+
+ EXPECT_EQ(h.Median(), tsh.Median());
+ EXPECT_EQ(h.Percentile(40.0), tsh.Percentile(40.0));
+ EXPECT_EQ(h.Average(), tsh.Average());
+ EXPECT_EQ(h.StandardDeviation(), tsh.StandardDeviation());
+ EXPECT_EQ(h.ToString(), tsh.ToString());
+}
+
+} // namespace histogram
+} // namespace tensorflow