Make TensorBoard histogram processing 3x faster

We discovered that on one of our datasets, the TensorBoard backend was spending 80% of its time processing histograms. Particularly the histogram compression routine, which did a lot of math in pure Python on raw histograms which are potentially quite large considering the C++ Histogram class uses a default bucket count of 774. These routines have now been optimized so that the _ProcessHistogram method of EventAccumulator and its subroutines take 39% of the time they did previously. This has been accomplished by doing the following: - Use numpy fast vector math to compute cumulative sum - Use numpy to binary search compression points in cumulative sum - Only convert histo.bucket to a list once rather than twice Change: 132285757
author: Justine Tunney <jart@google.com> 2016-09-05 22:20:15 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-09-05 23:32:34 -0700
commit: 31dfded6b984ace134e183e24ca2bd9585420d0a (patch)
tree: 596db3f493a9909c9459409ecede285962708515 /tensorflow/python/summary/event_accumulator_test.py
parent: ecf5c9cd0de6e2466894c8737997c571de0aa26d (diff)
1 files changed, 32 insertions, 85 deletions
diff --git a/tensorflow/python/summary/event_accumulator_test.py b/tensorflow/python/summary/event_accumulator_test.py
index 3349e73f04..a8db2c50e9 100644
--- a/tensorflow/python/summary/event_accumulator_test.py
+++ b/tensorflow/python/summary/event_accumulator_test.py
@@ -307,91 +307,38 @@ class MockingEventAccumulatorTest(EventAccumulatorTest):
         compressed_histogram_values=expected_vals2)
     self.assertEqual(acc.CompressedHistograms('hst2'), [expected_cmphst2])
 
-  def testPercentile(self):
-
-    def AssertExpectedForBps(bps, expected):
-      output = acc._Percentile(bps, bucket_limit, cumsum_weights, histo_min,
-                               histo_max, histo_num)
-      self.assertAlmostEqual(expected, output)
-
-    gen = _EventGenerator()
-    acc = ea.EventAccumulator(gen)
-
-    bucket_limit = [1, 2, 3, 4]
-    histo_num = 100
-
-    ## All weights in the first bucket
-    cumsum_weights = [10000, 10000, 10000, 10000]
-    histo_min = -1
-    histo_max = .9
-    AssertExpectedForBps(0, histo_min)
-    AssertExpectedForBps(2500, ea._Remap(2500, 0, 10000, histo_min, histo_max))
-    AssertExpectedForBps(5000, ea._Remap(5000, 0, 10000, histo_min, histo_max))
-    AssertExpectedForBps(7500, ea._Remap(7500, 0, 10000, histo_min, histo_max))
-    AssertExpectedForBps(10000, histo_max)
-
-    ## All weights in second bucket
-    cumsum_weights = [0, 10000, 10000, 10000]
-    histo_min = 1.1
-    histo_max = 1.8
-    AssertExpectedForBps(0, histo_min)
-    AssertExpectedForBps(2500, ea._Remap(2500, 0, 10000, histo_min, histo_max))
-    AssertExpectedForBps(5000, ea._Remap(5000, 0, 10000, histo_min, histo_max))
-    AssertExpectedForBps(7500, ea._Remap(7500, 0, 10000, histo_min, histo_max))
-    AssertExpectedForBps(10000, histo_max)
-
-    ## All weights in the last bucket
-    cumsum_weights = [0, 0, 0, 10000]
-    histo_min = 3.1
-    histo_max = 3.6
-    AssertExpectedForBps(0, histo_min)
-    AssertExpectedForBps(2500, ea._Remap(2500, 0, 10000, histo_min, histo_max))
-    AssertExpectedForBps(5000, ea._Remap(5000, 0, 10000, histo_min, histo_max))
-    AssertExpectedForBps(7500, ea._Remap(7500, 0, 10000, histo_min, histo_max))
-    AssertExpectedForBps(10000, histo_max)
-
-    ## Weights distributed between two buckets
-    cumsum_weights = [0, 4000, 10000, 10000]
-    histo_min = 1.1
-    histo_max = 2.9
-    AssertExpectedForBps(0, histo_min)
-    AssertExpectedForBps(2500, ea._Remap(2500, 0, 4000, histo_min,
-                                         bucket_limit[1]))
-    AssertExpectedForBps(5000, ea._Remap(5000, 4000, 10000, bucket_limit[1],
-                                         histo_max))
-    AssertExpectedForBps(7500, ea._Remap(7500, 4000, 10000, bucket_limit[1],
-                                         histo_max))
-    AssertExpectedForBps(10000, histo_max)
-
-    ## Weights distributed between all buckets
-    cumsum_weights = [1000, 4000, 8000, 10000]
-    histo_min = -1
-    histo_max = 3.9
-    AssertExpectedForBps(0, histo_min)
-    AssertExpectedForBps(2500, ea._Remap(2500, 1000, 4000, bucket_limit[0],
-                                         bucket_limit[1]))
-    AssertExpectedForBps(5000, ea._Remap(5000, 4000, 8000, bucket_limit[1],
-                                         bucket_limit[2]))
-    AssertExpectedForBps(7500, ea._Remap(7500, 4000, 8000, bucket_limit[1],
-                                         bucket_limit[2]))
-    AssertExpectedForBps(9000, ea._Remap(9000, 8000, 10000, bucket_limit[2],
-                                         histo_max))
-    AssertExpectedForBps(10000, histo_max)
-
-    ## Most weight in first bucket
-    cumsum_weights = [9000, 10000, 10000, 10000]
-    histo_min = -1
-    histo_max = 1.1
-    AssertExpectedForBps(0, histo_min)
-    AssertExpectedForBps(2500, ea._Remap(2500, 0, 9000, histo_min,
-                                         bucket_limit[0]))
-    AssertExpectedForBps(5000, ea._Remap(5000, 0, 9000, histo_min,
-                                         bucket_limit[0]))
-    AssertExpectedForBps(7500, ea._Remap(7500, 0, 9000, histo_min,
-                                         bucket_limit[0]))
-    AssertExpectedForBps(9500, ea._Remap(9500, 9000, 10000, bucket_limit[0],
-                                         histo_max))
-    AssertExpectedForBps(10000, histo_max)
+  def testCompressHistogram_uglyHistogram(self):
+    bps = (0, 668, 1587, 3085, 5000, 6915, 8413, 9332, 10000)
+    vals = ea._CompressHistogram(
+        ea.HistogramValue(
+            min=0.0,
+            max=1.0,
+            num=960.0,
+            sum=64.0,
+            sum_squares=64.0,
+            bucket_limit=[
+                0.0,
+                1e-12,
+                0.917246389039776,
+                1.0089710279437536,
+                1.7976931348623157e+308],
+            bucket=[
+                0.0,
+                896.0,
+                0.0,
+                64.0,
+                0.0]),
+        bps)
+    self.assertEquals(tuple(v.basis_point for v in vals), bps)
+    self.assertAlmostEqual(vals[0].value, 0.0)
+    self.assertAlmostEqual(vals[1].value, 7.157142857142856e-14)
+    self.assertAlmostEqual(vals[2].value, 1.7003571428571426e-13)
+    self.assertAlmostEqual(vals[3].value, 3.305357142857143e-13)
+    self.assertAlmostEqual(vals[4].value, 5.357142857142857e-13)
+    self.assertAlmostEqual(vals[5].value, 7.408928571428571e-13)
+    self.assertAlmostEqual(vals[6].value, 9.013928571428571e-13)
+    self.assertAlmostEqual(vals[7].value, 9.998571428571429e-13)
+    self.assertAlmostEqual(vals[8].value, 1.0)
 
   def testImages(self):
     gen = _EventGenerator()
author	Justine Tunney <jart@google.com>	2016-09-05 22:20:15 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-09-05 23:32:34 -0700
commit	31dfded6b984ace134e183e24ca2bd9585420d0a (patch)
tree	596db3f493a9909c9459409ecede285962708515 /tensorflow/python/summary/event_accumulator_test.py
parent	ecf5c9cd0de6e2466894c8737997c571de0aa26d (diff)