3 files changed, 478 insertions, 0 deletions
diff --git a/tensorflow/core/lib/histogram/histogram.cc b/tensorflow/core/lib/histogram/histogram.cc
new file mode 100644
index 0000000000..4c29d687b7
--- /dev/null
+++ b/tensorflow/core/lib/histogram/histogram.cc
@@ -0,0 +1,247 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "tensorflow/core/lib/histogram/histogram.h"
+#include <float.h>
+#include <math.h>
+#include "tensorflow/core/framework/summary.pb.h"
+
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/port.h"
+namespace tensorflow {
+namespace histogram {
+
+static std::vector<double>* InitDefaultBucketsInner() {
+  std::vector<double> buckets;
+  std::vector<double> neg_buckets;
+  // Make buckets whose range grows by 10% starting at 1.0e-12 up to 1.0e20
+  double v = 1.0e-12;
+  while (v < 1.0e20) {
+    buckets.push_back(v);
+    neg_buckets.push_back(-v);
+    v *= 1.1;
+  }
+  buckets.push_back(DBL_MAX);
+  neg_buckets.push_back(-DBL_MAX);
+  std::reverse(neg_buckets.begin(), neg_buckets.end());
+  std::vector<double>* result = new std::vector<double>;
+  result->insert(result->end(), neg_buckets.begin(), neg_buckets.end());
+  result->push_back(0.0);
+  result->insert(result->end(), buckets.begin(), buckets.end());
+  return result;
+}
+
+static gtl::ArraySlice<double> InitDefaultBuckets() {
+  static std::vector<double>* default_bucket_limits = InitDefaultBucketsInner();
+  return *default_bucket_limits;
+}
+
+Histogram::Histogram() : bucket_limits_(InitDefaultBuckets()) { Clear(); }
+
+// Create a histogram with a custom set of bucket limits,
+// specified in "custom_buckets[0..custom_buckets.size()-1]"
+Histogram::Histogram(gtl::ArraySlice<double> custom_bucket_limits)
+    : custom_bucket_limits_(custom_bucket_limits.begin(),
+                            custom_bucket_limits.end()),
+      bucket_limits_(custom_bucket_limits_) {
+#ifndef NDEBUG
+  DCHECK_GT(bucket_limits_.size(), 0);
+  // Verify that the bucket boundaries are strictly increasing
+  for (size_t i = 1; i < bucket_limits_.size(); i++) {
+    DCHECK_GT(bucket_limits_[i], bucket_limits_[i - 1]);
+  }
+#endif
+  Clear();
+}
+
+bool Histogram::DecodeFromProto(const HistogramProto& proto) {
+  if ((proto.bucket_size() != proto.bucket_limit_size()) ||
+      (proto.bucket_size() == 0)) {
+    return false;
+  }
+  min_ = proto.min();
+  max_ = proto.max();
+  num_ = proto.num();
+  sum_ = proto.sum();
+  sum_squares_ = proto.sum_squares();
+  custom_bucket_limits_.clear();
+  custom_bucket_limits_.insert(custom_bucket_limits_.end(),
+                               proto.bucket_limit().begin(),
+                               proto.bucket_limit().end());
+  bucket_limits_ = custom_bucket_limits_;
+  buckets_.clear();
+  buckets_.insert(buckets_.end(), proto.bucket().begin(), proto.bucket().end());
+  return true;
+}
+
+void Histogram::Clear() {
+  min_ = bucket_limits_[bucket_limits_.size() - 1];
+  max_ = -DBL_MAX;
+  num_ = 0;
+  sum_ = 0;
+  sum_squares_ = 0;
+  buckets_.resize(bucket_limits_.size());
+  for (size_t i = 0; i < bucket_limits_.size(); i++) {
+    buckets_[i] = 0;
+  }
+}
+
+void Histogram::Add(double value) {
+  int b =
+      std::upper_bound(bucket_limits_.begin(), bucket_limits_.end(), value) -
+      bucket_limits_.begin();
+
+  buckets_[b] += 1.0;
+  if (min_ > value) min_ = value;
+  if (max_ < value) max_ = value;
+  num_++;
+  sum_ += value;
+  sum_squares_ += (value * value);
+}
+
+double Histogram::Median() const { return Percentile(50.0); }
+
+double Histogram::Percentile(double p) const {
+  if (num_ == 0.0) return 0.0;
+  double threshold = num_ * (p / 100.0);
+  double sum = 0;
+  for (size_t b = 0; b < buckets_.size(); b++) {
+    sum += buckets_[b];
+    if (sum >= threshold) {
+      // Scale linearly within this bucket
+      double left_point = (b == 0) ? min_ : bucket_limits_[b - 1];
+      double right_point = bucket_limits_[b];
+      double left_sum = sum - buckets_[b];
+      double right_sum = sum;
+      double pos = (threshold - left_sum) / (right_sum - left_sum);
+      double r = left_point + (right_point - left_point) * pos;
+      if (r < min_) r = min_;
+      if (r > max_) r = max_;
+      return r;
+    }
+  }
+  return max_;
+}
+
+double Histogram::Average() const {
+  if (num_ == 0.0) return 0;
+  return sum_ / num_;
+}
+
+double Histogram::StandardDeviation() const {
+  if (num_ == 0.0) return 0;
+  double variance = (sum_squares_ * num_ - sum_ * sum_) / (num_ * num_);
+  return sqrt(variance);
+}
+
+std::string Histogram::ToString() const {
+  std::string r;
+  char buf[200];
+  snprintf(buf, sizeof(buf), "Count: %.0f  Average: %.4f  StdDev: %.2f\n", num_,
+           Average(), StandardDeviation());
+  r.append(buf);
+  snprintf(buf, sizeof(buf), "Min: %.4f  Median: %.4f  Max: %.4f\n",
+           (num_ == 0.0 ? 0.0 : min_), Median(), max_);
+  r.append(buf);
+  r.append("------------------------------------------------------\n");
+  const double mult = num_ > 0 ? 100.0 / num_ : 0.0;
+  double sum = 0;
+  for (size_t b = 0; b < buckets_.size(); b++) {
+    if (buckets_[b] <= 0.0) continue;
+    sum += buckets_[b];
+    snprintf(buf, sizeof(buf), "[ %10.2g, %10.2g ) %7.0f %7.3f%% %7.3f%% ",
+             ((b == 0) ? -DBL_MAX : bucket_limits_[b - 1]),  // left
+             bucket_limits_[b],                              // right
+             buckets_[b],                                    // count
+             mult * buckets_[b],                             // percentage
+             mult * sum);                                    // cum percentage
+    r.append(buf);
+
+    // Add hash marks based on percentage; 20 marks for 100%.
+    int marks = static_cast<int>(20 * (buckets_[b] / num_) + 0.5);
+    r.append(marks, '#');
+    r.push_back('\n');
+  }
+  return r;
+}
+
+void Histogram::EncodeToProto(HistogramProto* proto,
+                              bool preserve_zero_buckets) const {
+  proto->Clear();
+  proto->set_min(min_);
+  proto->set_max(max_);
+  proto->set_num(num_);
+  proto->set_sum(sum_);
+  proto->set_sum_squares(sum_squares_);
+  for (size_t i = 0; i < buckets_.size();) {
+    double end = bucket_limits_[i];
+    double count = buckets_[i];
+    i++;
+    if (!preserve_zero_buckets && count <= 0.0) {
+      // Find run of empty buckets and collapse them into one
+      while (i < buckets_.size() && buckets_[i] <= 0.0) {
+        end = bucket_limits_[i];
+        count = buckets_[i];
+        i++;
+      }
+    }
+    proto->add_bucket_limit(end);
+    proto->add_bucket(count);
+  }
+  if (proto->bucket_size() == 0.0) {
+    // It's easier when we restore if we always have at least one bucket entry
+    proto->add_bucket_limit(DBL_MAX);
+    proto->add_bucket(0.0);
+  }
+}
+
+// ThreadSafeHistogram implementation.
+bool ThreadSafeHistogram::DecodeFromProto(const HistogramProto& proto) {
+  mutex_lock l(mu_);
+  return histogram_.DecodeFromProto(proto);
+}
+
+void ThreadSafeHistogram::Clear() {
+  mutex_lock l(mu_);
+  histogram_.Clear();
+}
+
+void ThreadSafeHistogram::Add(double value) {
+  mutex_lock l(mu_);
+  histogram_.Add(value);
+}
+
+void ThreadSafeHistogram::EncodeToProto(HistogramProto* proto,
+                                        bool preserve_zero_buckets) const {
+  mutex_lock l(mu_);
+  histogram_.EncodeToProto(proto, preserve_zero_buckets);
+}
+
+double ThreadSafeHistogram::Median() const {
+  mutex_lock l(mu_);
+  return histogram_.Median();
+}
+
+double ThreadSafeHistogram::Percentile(double p) const {
+  mutex_lock l(mu_);
+  return histogram_.Percentile(p);
+}
+
+double ThreadSafeHistogram::Average() const {
+  mutex_lock l(mu_);
+  return histogram_.Average();
+}
+
+double ThreadSafeHistogram::StandardDeviation() const {
+  mutex_lock l(mu_);
+  return histogram_.StandardDeviation();
+}
+
+std::string ThreadSafeHistogram::ToString() const {
+  mutex_lock l(mu_);
+  return histogram_.ToString();
+}
+
+}  // namespace histogram
+}  // namespace tensorflow
diff --git a/tensorflow/core/lib/histogram/histogram.h b/tensorflow/core/lib/histogram/histogram.h
new file mode 100644
index 0000000000..9b655f3acb
--- /dev/null
+++ b/tensorflow/core/lib/histogram/histogram.h
@@ -0,0 +1,119 @@
+#ifndef TENSORFLOW_LIB_HISTOGRAM_HISTOGRAM_H_
+#define TENSORFLOW_LIB_HISTOGRAM_HISTOGRAM_H_
+
+#include <string>
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/platform/port.h"
+#include "tensorflow/core/platform/thread_annotations.h"
+
+namespace tensorflow {
+
+class HistogramProto;
+
+namespace histogram {
+
+class Histogram {
+ public:
+  // Create a histogram with a default set of bucket boundaries.
+  // Buckets near zero cover very small ranges (e.g. 10^-12), and each
+  // bucket range grows by ~10% as we head away from zero.  The
+  // buckets cover the range from -DBL_MAX to DBL_MAX.
+  Histogram();
+
+  // Create a histogram with a custom set of bucket boundaries,
+  // specified in "custom_bucket_limits[0..custom_bucket_limits.size()-1]"
+  // REQUIRES: custom_bucket_limits[i] values are monotonically increasing.
+  // REQUIRES: custom_bucket_limits is not empty()
+  explicit Histogram(gtl::ArraySlice<double> custom_bucket_limits);
+
+  // Restore the state of a histogram that was previously encoded
+  // via Histogram::EncodeToProto.  Note that only the bucket boundaries
+  // generated by EncodeToProto will be restored.
+  bool DecodeFromProto(const HistogramProto& proto);
+
+  ~Histogram() {}
+
+  void Clear();
+  void Add(double value);
+
+  // Save the current state of the histogram to "*proto".  If
+  // "preserve_zero_buckets" is false, only non-zero bucket values and
+  // ranges are saved, and the bucket boundaries of zero-valued buckets
+  // are lost.
+  void EncodeToProto(HistogramProto* proto, bool preserve_zero_buckets) const;
+
+  // Return the median of the values in the histogram
+  double Median() const;
+
+  // Return the "p"th percentile [0.0..100.0] of the values in the
+  // distribution
+  double Percentile(double p) const;
+
+  // Return the average value of the distribution
+  double Average() const;
+
+  // Return the standard deviation of values in the distribution
+  double StandardDeviation() const;
+
+  // Returns a multi-line human-readable string representing the histogram
+  // contents.  Example output:
+  //   Count: 4  Average: 251.7475  StdDev: 432.02
+  //   Min: -3.0000  Median: 5.0000  Max: 1000.0000
+  //   ------------------------------------------------------
+  //   [      -5,       0 )       1  25.000%  25.000% #####
+  //   [       0,       5 )       1  25.000%  50.000% #####
+  //   [       5,      10 )       1  25.000%  75.000% #####
+  //   [    1000,   10000 )       1  25.000% 100.000% #####
+  std::string ToString() const;
+
+ private:
+  double min_;
+  double max_;
+  double num_;
+  double sum_;
+  double sum_squares_;
+
+  std::vector<double> custom_bucket_limits_;
+  gtl::ArraySlice<double> bucket_limits_;
+  std::vector<double> buckets_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(Histogram);
+};
+
+// Wrapper around a Histogram object that is thread safe.
+//
+// All methods hold a lock while delegating to a Histogram object owned by the
+// ThreadSafeHistogram instance.
+//
+// See Histogram for documentation of the methods.
+class ThreadSafeHistogram {
+ public:
+  ThreadSafeHistogram() {}
+  explicit ThreadSafeHistogram(gtl::ArraySlice<double> custom_bucket_limits)
+      : histogram_(custom_bucket_limits) {}
+  bool DecodeFromProto(const HistogramProto& proto);
+
+  ~ThreadSafeHistogram() {}
+
+  void Clear();
+
+  // TODO(mdevin): It might be a good idea to provide a AddN(<many values>)
+  // method to avoid grabbing/releasing the lock when adding many values.
+  void Add(double value);
+
+  void EncodeToProto(HistogramProto* proto, bool preserve_zero_buckets) const;
+  double Median() const;
+  double Percentile(double p) const;
+  double Average() const;
+  double StandardDeviation() const;
+  std::string ToString() const;
+
+ private:
+  mutable mutex mu_;
+  Histogram histogram_ GUARDED_BY(mu_);
+};
+
+}  // namespace histogram
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_LIB_HISTOGRAM_HISTOGRAM_H_
diff --git a/tensorflow/core/lib/histogram/histogram_test.cc b/tensorflow/core/lib/histogram/histogram_test.cc
new file mode 100644
index 0000000000..ede44fe85b
--- /dev/null
+++ b/tensorflow/core/lib/histogram/histogram_test.cc
@@ -0,0 +1,112 @@
+#include "tensorflow/core/lib/histogram/histogram.h"
+#include <float.h>
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/framework/summary.pb.h"
+#include <gtest/gtest.h>
+
+namespace tensorflow {
+namespace histogram {
+
+static void Validate(const Histogram& h) {
+  string s1 = h.ToString();
+  LOG(ERROR) << s1;
+
+  HistogramProto proto_with_zeroes;
+  h.EncodeToProto(&proto_with_zeroes, true);
+  Histogram h2;
+  EXPECT_TRUE(h2.DecodeFromProto(proto_with_zeroes));
+  string s2 = h2.ToString();
+  LOG(ERROR) << s2;
+
+  EXPECT_EQ(s1, s2);
+
+  HistogramProto proto_no_zeroes;
+  h.EncodeToProto(&proto_no_zeroes, false);
+  LOG(ERROR) << proto_no_zeroes.DebugString();
+  Histogram h3;
+  EXPECT_TRUE(h3.DecodeFromProto(proto_no_zeroes));
+  string s3 = h3.ToString();
+  LOG(ERROR) << s3;
+
+  EXPECT_EQ(s1, s3);
+}
+
+TEST(Histogram, Empty) {
+  Histogram h;
+  Validate(h);
+}
+
+TEST(Histogram, SingleValue) {
+  Histogram h;
+  h.Add(-3.0);
+  Validate(h);
+}
+
+TEST(Histogram, CustomBuckets) {
+  Histogram h({-10, -5, 0, 5, 10, 100, 1000, 10000, DBL_MAX});
+  h.Add(-3.0);
+  h.Add(4.99);
+  h.Add(5.0);
+  h.Add(1000.0);
+  Validate(h);
+}
+
+TEST(Histogram, Percentile) {
+  Histogram h({0, 10, 100, DBL_MAX});
+  h.Add(-2);
+  h.Add(-2);
+  h.Add(0);
+  double median = h.Percentile(50.0);
+  EXPECT_EQ(median, -0.5);
+}
+
+TEST(Histogram, Basic) {
+  Histogram h;
+  for (int i = 0; i < 100; i++) {
+    h.Add(i);
+  }
+  for (int i = 1000; i < 100000; i += 1000) {
+    h.Add(i);
+  }
+  Validate(h);
+}
+
+TEST(ThreadSafeHistogram, Basic) {
+  // Fill a normal histogram.
+  Histogram h;
+  for (int i = 0; i < 100; i++) {
+    h.Add(i);
+  }
+
+  // Fill a thread-safe histogram with the same values.
+  ThreadSafeHistogram tsh;
+  for (int i = 0; i < 100; i++) {
+    tsh.Add(i);
+  }
+
+  for (int i = 0; i < 2; ++i) {
+    bool preserve_zero_buckets = (i == 0);
+    HistogramProto h_proto;
+    h.EncodeToProto(&h_proto, preserve_zero_buckets);
+    HistogramProto tsh_proto;
+    tsh.EncodeToProto(&tsh_proto, preserve_zero_buckets);
+
+    // Let's decode from the proto of the other histogram type.
+    Histogram h2;
+    EXPECT_TRUE(h2.DecodeFromProto(tsh_proto));
+    ThreadSafeHistogram tsh2;
+    EXPECT_TRUE(tsh2.DecodeFromProto(h_proto));
+
+    // Now let's reencode and check they match.
+    EXPECT_EQ(h2.ToString(), tsh2.ToString());
+  }
+
+  EXPECT_EQ(h.Median(), tsh.Median());
+  EXPECT_EQ(h.Percentile(40.0), tsh.Percentile(40.0));
+  EXPECT_EQ(h.Average(), tsh.Average());
+  EXPECT_EQ(h.StandardDeviation(), tsh.StandardDeviation());
+  EXPECT_EQ(h.ToString(), tsh.ToString());
+}
+
+}  // namespace histogram
+}  // namespace tensorflow