diff options
author | Manjunath Kudlur <keveman@gmail.com> | 2015-11-06 16:27:58 -0800 |
---|---|---|
committer | Manjunath Kudlur <keveman@gmail.com> | 2015-11-06 16:27:58 -0800 |
commit | f41959ccb2d9d4c722fe8fc3351401d53bcf4900 (patch) | |
tree | ef0ca22cb2a5ac4bdec9d080d8e0788a53ed496d /tensorflow/core/lib/histogram |
TensorFlow: Initial commit of TensorFlow library.
TensorFlow is an open source software library for numerical computation
using data flow graphs.
Base CL: 107276108
Diffstat (limited to 'tensorflow/core/lib/histogram')
-rw-r--r-- | tensorflow/core/lib/histogram/histogram.cc | 247 | ||||
-rw-r--r-- | tensorflow/core/lib/histogram/histogram.h | 119 | ||||
-rw-r--r-- | tensorflow/core/lib/histogram/histogram_test.cc | 112 |
3 files changed, 478 insertions, 0 deletions
diff --git a/tensorflow/core/lib/histogram/histogram.cc b/tensorflow/core/lib/histogram/histogram.cc new file mode 100644 index 0000000000..4c29d687b7 --- /dev/null +++ b/tensorflow/core/lib/histogram/histogram.cc @@ -0,0 +1,247 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "tensorflow/core/lib/histogram/histogram.h" +#include <float.h> +#include <math.h> +#include "tensorflow/core/framework/summary.pb.h" + +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/port.h" +namespace tensorflow { +namespace histogram { + +static std::vector<double>* InitDefaultBucketsInner() { + std::vector<double> buckets; + std::vector<double> neg_buckets; + // Make buckets whose range grows by 10% starting at 1.0e-12 up to 1.0e20 + double v = 1.0e-12; + while (v < 1.0e20) { + buckets.push_back(v); + neg_buckets.push_back(-v); + v *= 1.1; + } + buckets.push_back(DBL_MAX); + neg_buckets.push_back(-DBL_MAX); + std::reverse(neg_buckets.begin(), neg_buckets.end()); + std::vector<double>* result = new std::vector<double>; + result->insert(result->end(), neg_buckets.begin(), neg_buckets.end()); + result->push_back(0.0); + result->insert(result->end(), buckets.begin(), buckets.end()); + return result; +} + +static gtl::ArraySlice<double> InitDefaultBuckets() { + static std::vector<double>* default_bucket_limits = InitDefaultBucketsInner(); + return *default_bucket_limits; +} + +Histogram::Histogram() : bucket_limits_(InitDefaultBuckets()) { Clear(); } + +// Create a histogram with a custom set of bucket limits, +// specified in "custom_buckets[0..custom_buckets.size()-1]" +Histogram::Histogram(gtl::ArraySlice<double> custom_bucket_limits) + : custom_bucket_limits_(custom_bucket_limits.begin(), + custom_bucket_limits.end()), + bucket_limits_(custom_bucket_limits_) { +#ifndef NDEBUG + DCHECK_GT(bucket_limits_.size(), 0); + // Verify that the bucket boundaries are strictly increasing + for (size_t i = 1; i < bucket_limits_.size(); i++) { + DCHECK_GT(bucket_limits_[i], bucket_limits_[i - 1]); + } +#endif + Clear(); +} + +bool Histogram::DecodeFromProto(const HistogramProto& proto) { + if ((proto.bucket_size() != proto.bucket_limit_size()) || + (proto.bucket_size() == 0)) { + return false; + } + min_ = proto.min(); + max_ = proto.max(); + num_ = proto.num(); + sum_ = proto.sum(); + sum_squares_ = proto.sum_squares(); + custom_bucket_limits_.clear(); + custom_bucket_limits_.insert(custom_bucket_limits_.end(), + proto.bucket_limit().begin(), + proto.bucket_limit().end()); + bucket_limits_ = custom_bucket_limits_; + buckets_.clear(); + buckets_.insert(buckets_.end(), proto.bucket().begin(), proto.bucket().end()); + return true; +} + +void Histogram::Clear() { + min_ = bucket_limits_[bucket_limits_.size() - 1]; + max_ = -DBL_MAX; + num_ = 0; + sum_ = 0; + sum_squares_ = 0; + buckets_.resize(bucket_limits_.size()); + for (size_t i = 0; i < bucket_limits_.size(); i++) { + buckets_[i] = 0; + } +} + +void Histogram::Add(double value) { + int b = + std::upper_bound(bucket_limits_.begin(), bucket_limits_.end(), value) - + bucket_limits_.begin(); + + buckets_[b] += 1.0; + if (min_ > value) min_ = value; + if (max_ < value) max_ = value; + num_++; + sum_ += value; + sum_squares_ += (value * value); +} + +double Histogram::Median() const { return Percentile(50.0); } + +double Histogram::Percentile(double p) const { + if (num_ == 0.0) return 0.0; + double threshold = num_ * (p / 100.0); + double sum = 0; + for (size_t b = 0; b < buckets_.size(); b++) { + sum += buckets_[b]; + if (sum >= threshold) { + // Scale linearly within this bucket + double left_point = (b == 0) ? min_ : bucket_limits_[b - 1]; + double right_point = bucket_limits_[b]; + double left_sum = sum - buckets_[b]; + double right_sum = sum; + double pos = (threshold - left_sum) / (right_sum - left_sum); + double r = left_point + (right_point - left_point) * pos; + if (r < min_) r = min_; + if (r > max_) r = max_; + return r; + } + } + return max_; +} + +double Histogram::Average() const { + if (num_ == 0.0) return 0; + return sum_ / num_; +} + +double Histogram::StandardDeviation() const { + if (num_ == 0.0) return 0; + double variance = (sum_squares_ * num_ - sum_ * sum_) / (num_ * num_); + return sqrt(variance); +} + +std::string Histogram::ToString() const { + std::string r; + char buf[200]; + snprintf(buf, sizeof(buf), "Count: %.0f Average: %.4f StdDev: %.2f\n", num_, + Average(), StandardDeviation()); + r.append(buf); + snprintf(buf, sizeof(buf), "Min: %.4f Median: %.4f Max: %.4f\n", + (num_ == 0.0 ? 0.0 : min_), Median(), max_); + r.append(buf); + r.append("------------------------------------------------------\n"); + const double mult = num_ > 0 ? 100.0 / num_ : 0.0; + double sum = 0; + for (size_t b = 0; b < buckets_.size(); b++) { + if (buckets_[b] <= 0.0) continue; + sum += buckets_[b]; + snprintf(buf, sizeof(buf), "[ %10.2g, %10.2g ) %7.0f %7.3f%% %7.3f%% ", + ((b == 0) ? -DBL_MAX : bucket_limits_[b - 1]), // left + bucket_limits_[b], // right + buckets_[b], // count + mult * buckets_[b], // percentage + mult * sum); // cum percentage + r.append(buf); + + // Add hash marks based on percentage; 20 marks for 100%. + int marks = static_cast<int>(20 * (buckets_[b] / num_) + 0.5); + r.append(marks, '#'); + r.push_back('\n'); + } + return r; +} + +void Histogram::EncodeToProto(HistogramProto* proto, + bool preserve_zero_buckets) const { + proto->Clear(); + proto->set_min(min_); + proto->set_max(max_); + proto->set_num(num_); + proto->set_sum(sum_); + proto->set_sum_squares(sum_squares_); + for (size_t i = 0; i < buckets_.size();) { + double end = bucket_limits_[i]; + double count = buckets_[i]; + i++; + if (!preserve_zero_buckets && count <= 0.0) { + // Find run of empty buckets and collapse them into one + while (i < buckets_.size() && buckets_[i] <= 0.0) { + end = bucket_limits_[i]; + count = buckets_[i]; + i++; + } + } + proto->add_bucket_limit(end); + proto->add_bucket(count); + } + if (proto->bucket_size() == 0.0) { + // It's easier when we restore if we always have at least one bucket entry + proto->add_bucket_limit(DBL_MAX); + proto->add_bucket(0.0); + } +} + +// ThreadSafeHistogram implementation. +bool ThreadSafeHistogram::DecodeFromProto(const HistogramProto& proto) { + mutex_lock l(mu_); + return histogram_.DecodeFromProto(proto); +} + +void ThreadSafeHistogram::Clear() { + mutex_lock l(mu_); + histogram_.Clear(); +} + +void ThreadSafeHistogram::Add(double value) { + mutex_lock l(mu_); + histogram_.Add(value); +} + +void ThreadSafeHistogram::EncodeToProto(HistogramProto* proto, + bool preserve_zero_buckets) const { + mutex_lock l(mu_); + histogram_.EncodeToProto(proto, preserve_zero_buckets); +} + +double ThreadSafeHistogram::Median() const { + mutex_lock l(mu_); + return histogram_.Median(); +} + +double ThreadSafeHistogram::Percentile(double p) const { + mutex_lock l(mu_); + return histogram_.Percentile(p); +} + +double ThreadSafeHistogram::Average() const { + mutex_lock l(mu_); + return histogram_.Average(); +} + +double ThreadSafeHistogram::StandardDeviation() const { + mutex_lock l(mu_); + return histogram_.StandardDeviation(); +} + +std::string ThreadSafeHistogram::ToString() const { + mutex_lock l(mu_); + return histogram_.ToString(); +} + +} // namespace histogram +} // namespace tensorflow diff --git a/tensorflow/core/lib/histogram/histogram.h b/tensorflow/core/lib/histogram/histogram.h new file mode 100644 index 0000000000..9b655f3acb --- /dev/null +++ b/tensorflow/core/lib/histogram/histogram.h @@ -0,0 +1,119 @@ +#ifndef TENSORFLOW_LIB_HISTOGRAM_HISTOGRAM_H_ +#define TENSORFLOW_LIB_HISTOGRAM_HISTOGRAM_H_ + +#include <string> +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/port.h" +#include "tensorflow/core/platform/thread_annotations.h" + +namespace tensorflow { + +class HistogramProto; + +namespace histogram { + +class Histogram { + public: + // Create a histogram with a default set of bucket boundaries. + // Buckets near zero cover very small ranges (e.g. 10^-12), and each + // bucket range grows by ~10% as we head away from zero. The + // buckets cover the range from -DBL_MAX to DBL_MAX. + Histogram(); + + // Create a histogram with a custom set of bucket boundaries, + // specified in "custom_bucket_limits[0..custom_bucket_limits.size()-1]" + // REQUIRES: custom_bucket_limits[i] values are monotonically increasing. + // REQUIRES: custom_bucket_limits is not empty() + explicit Histogram(gtl::ArraySlice<double> custom_bucket_limits); + + // Restore the state of a histogram that was previously encoded + // via Histogram::EncodeToProto. Note that only the bucket boundaries + // generated by EncodeToProto will be restored. + bool DecodeFromProto(const HistogramProto& proto); + + ~Histogram() {} + + void Clear(); + void Add(double value); + + // Save the current state of the histogram to "*proto". If + // "preserve_zero_buckets" is false, only non-zero bucket values and + // ranges are saved, and the bucket boundaries of zero-valued buckets + // are lost. + void EncodeToProto(HistogramProto* proto, bool preserve_zero_buckets) const; + + // Return the median of the values in the histogram + double Median() const; + + // Return the "p"th percentile [0.0..100.0] of the values in the + // distribution + double Percentile(double p) const; + + // Return the average value of the distribution + double Average() const; + + // Return the standard deviation of values in the distribution + double StandardDeviation() const; + + // Returns a multi-line human-readable string representing the histogram + // contents. Example output: + // Count: 4 Average: 251.7475 StdDev: 432.02 + // Min: -3.0000 Median: 5.0000 Max: 1000.0000 + // ------------------------------------------------------ + // [ -5, 0 ) 1 25.000% 25.000% ##### + // [ 0, 5 ) 1 25.000% 50.000% ##### + // [ 5, 10 ) 1 25.000% 75.000% ##### + // [ 1000, 10000 ) 1 25.000% 100.000% ##### + std::string ToString() const; + + private: + double min_; + double max_; + double num_; + double sum_; + double sum_squares_; + + std::vector<double> custom_bucket_limits_; + gtl::ArraySlice<double> bucket_limits_; + std::vector<double> buckets_; + + TF_DISALLOW_COPY_AND_ASSIGN(Histogram); +}; + +// Wrapper around a Histogram object that is thread safe. +// +// All methods hold a lock while delegating to a Histogram object owned by the +// ThreadSafeHistogram instance. +// +// See Histogram for documentation of the methods. +class ThreadSafeHistogram { + public: + ThreadSafeHistogram() {} + explicit ThreadSafeHistogram(gtl::ArraySlice<double> custom_bucket_limits) + : histogram_(custom_bucket_limits) {} + bool DecodeFromProto(const HistogramProto& proto); + + ~ThreadSafeHistogram() {} + + void Clear(); + + // TODO(mdevin): It might be a good idea to provide a AddN(<many values>) + // method to avoid grabbing/releasing the lock when adding many values. + void Add(double value); + + void EncodeToProto(HistogramProto* proto, bool preserve_zero_buckets) const; + double Median() const; + double Percentile(double p) const; + double Average() const; + double StandardDeviation() const; + std::string ToString() const; + + private: + mutable mutex mu_; + Histogram histogram_ GUARDED_BY(mu_); +}; + +} // namespace histogram +} // namespace tensorflow + +#endif // TENSORFLOW_LIB_HISTOGRAM_HISTOGRAM_H_ diff --git a/tensorflow/core/lib/histogram/histogram_test.cc b/tensorflow/core/lib/histogram/histogram_test.cc new file mode 100644 index 0000000000..ede44fe85b --- /dev/null +++ b/tensorflow/core/lib/histogram/histogram_test.cc @@ -0,0 +1,112 @@ +#include "tensorflow/core/lib/histogram/histogram.h" +#include <float.h> +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/framework/summary.pb.h" +#include <gtest/gtest.h> + +namespace tensorflow { +namespace histogram { + +static void Validate(const Histogram& h) { + string s1 = h.ToString(); + LOG(ERROR) << s1; + + HistogramProto proto_with_zeroes; + h.EncodeToProto(&proto_with_zeroes, true); + Histogram h2; + EXPECT_TRUE(h2.DecodeFromProto(proto_with_zeroes)); + string s2 = h2.ToString(); + LOG(ERROR) << s2; + + EXPECT_EQ(s1, s2); + + HistogramProto proto_no_zeroes; + h.EncodeToProto(&proto_no_zeroes, false); + LOG(ERROR) << proto_no_zeroes.DebugString(); + Histogram h3; + EXPECT_TRUE(h3.DecodeFromProto(proto_no_zeroes)); + string s3 = h3.ToString(); + LOG(ERROR) << s3; + + EXPECT_EQ(s1, s3); +} + +TEST(Histogram, Empty) { + Histogram h; + Validate(h); +} + +TEST(Histogram, SingleValue) { + Histogram h; + h.Add(-3.0); + Validate(h); +} + +TEST(Histogram, CustomBuckets) { + Histogram h({-10, -5, 0, 5, 10, 100, 1000, 10000, DBL_MAX}); + h.Add(-3.0); + h.Add(4.99); + h.Add(5.0); + h.Add(1000.0); + Validate(h); +} + +TEST(Histogram, Percentile) { + Histogram h({0, 10, 100, DBL_MAX}); + h.Add(-2); + h.Add(-2); + h.Add(0); + double median = h.Percentile(50.0); + EXPECT_EQ(median, -0.5); +} + +TEST(Histogram, Basic) { + Histogram h; + for (int i = 0; i < 100; i++) { + h.Add(i); + } + for (int i = 1000; i < 100000; i += 1000) { + h.Add(i); + } + Validate(h); +} + +TEST(ThreadSafeHistogram, Basic) { + // Fill a normal histogram. + Histogram h; + for (int i = 0; i < 100; i++) { + h.Add(i); + } + + // Fill a thread-safe histogram with the same values. + ThreadSafeHistogram tsh; + for (int i = 0; i < 100; i++) { + tsh.Add(i); + } + + for (int i = 0; i < 2; ++i) { + bool preserve_zero_buckets = (i == 0); + HistogramProto h_proto; + h.EncodeToProto(&h_proto, preserve_zero_buckets); + HistogramProto tsh_proto; + tsh.EncodeToProto(&tsh_proto, preserve_zero_buckets); + + // Let's decode from the proto of the other histogram type. + Histogram h2; + EXPECT_TRUE(h2.DecodeFromProto(tsh_proto)); + ThreadSafeHistogram tsh2; + EXPECT_TRUE(tsh2.DecodeFromProto(h_proto)); + + // Now let's reencode and check they match. + EXPECT_EQ(h2.ToString(), tsh2.ToString()); + } + + EXPECT_EQ(h.Median(), tsh.Median()); + EXPECT_EQ(h.Percentile(40.0), tsh.Percentile(40.0)); + EXPECT_EQ(h.Average(), tsh.Average()); + EXPECT_EQ(h.StandardDeviation(), tsh.StandardDeviation()); + EXPECT_EQ(h.ToString(), tsh.ToString()); +} + +} // namespace histogram +} // namespace tensorflow |