aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar William Chargin <wchargin@google.com>2017-06-13 09:59:10 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-06-13 10:02:44 -0700
commitdc65e091582841ed940aba442cf147e78e05f1f4 (patch)
tree569e5dde1270042242d2e01f21816cb3fc3835bb
parentfbd8bcd5f2cd97a7109f87281d95ad4b59ebe714 (diff)
Let /data/runs sort runs by their first events
Now that we've eliminated all actual requests to `/data/runs` on the client, we may freely change the route to return what it actually should: a simple array of run names, sorted by their `first_event_timestamp` so that the order is append-only with respect to newly discovered runs. PiperOrigin-RevId: 158859018
-rw-r--r--tensorflow/tensorboard/backend/application.py47
-rw-r--r--tensorflow/tensorboard/backend/application_test.py102
-rw-r--r--tensorflow/tensorboard/http_api.md40
3 files changed, 97 insertions, 92 deletions
diff --git a/tensorflow/tensorboard/backend/application.py b/tensorflow/tensorboard/backend/application.py
index 9c492e7dd3..3657eee38b 100644
--- a/tensorflow/tensorboard/backend/application.py
+++ b/tensorflow/tensorboard/backend/application.py
@@ -46,23 +46,6 @@ DEFAULT_SIZE_GUIDANCE = {
event_accumulator.HISTOGRAMS: 50,
}
-# The following types of data shouldn't be shown in the output of the
-# /data/runs route, because they're now handled by independent plugins
-# (e.g., the content at the 'scalars' key should now be fetched from
-# /data/plugin/scalars/runs).
-#
-# Once everything has been migrated, we should be able to delete
-# /data/runs entirely.
-_MIGRATED_DATA_KEYS = frozenset((
- 'audio',
- 'distributions',
- 'graph',
- 'histograms',
- 'images',
- 'run_metadata',
- 'scalars',
-))
-
DATA_PREFIX = '/data'
LOGDIR_ROUTE = '/logdir'
RUNS_ROUTE = '/runs'
@@ -149,10 +132,8 @@ class TensorBoardWSGIApp(object):
self._serve_logdir,
# TODO(chizeng): Delete this RPC once we have skylark rules that obviate
# the need for the frontend to determine which plugins are active.
- DATA_PREFIX + PLUGINS_LISTING_ROUTE:
- self._serve_plugins_listing,
- DATA_PREFIX + RUNS_ROUTE:
- self._serve_runs,
+ DATA_PREFIX + PLUGINS_LISTING_ROUTE: self._serve_plugins_listing,
+ DATA_PREFIX + RUNS_ROUTE: self._serve_runs,
}
# Serve the routes from the registered plugins using their name as the route
@@ -235,19 +216,25 @@ class TensorBoardWSGIApp(object):
A werkzeug Response with the following content:
{runName: {firstEventTimestamp: 123456.789}}
"""
- runs = self._multiplexer.Runs()
- for run_name, run_data in runs.items():
- for key in _MIGRATED_DATA_KEYS:
- if key in run_data:
- del run_data[key]
+ run_names = sorted(self._multiplexer.Runs()) # Why `sorted`? See below.
+ def get_first_event_timestamp(run_name):
try:
- run_data['firstEventTimestamp'] = self._multiplexer.FirstEventTimestamp(
- run_name)
+ return self._multiplexer.FirstEventTimestamp(run_name)
except ValueError:
tf.logging.warning('Unable to get first event timestamp for run %s',
run_name)
- run_data['firstEventTimestamp'] = None
- return http_util.Respond(request, runs, 'application/json')
+ # Put runs without a timestamp at the end. Their internal
+ # ordering would be nondeterministic, but Python's sorts are
+ # stable, so `sorted`ing the initial list above provides a
+ # deterministic ordering. Of course, we cannot guarantee that
+ # this will be append-only for new event-less runs.
+ return float('inf')
+ first_event_timestamps = {
+ run_name: get_first_event_timestamp(run_name)
+ for run_name in run_names
+ }
+ run_names.sort(key=first_event_timestamps.get)
+ return http_util.Respond(request, run_names, 'application/json')
@wrappers.Request.application
def _serve_index(self, request):
diff --git a/tensorflow/tensorboard/backend/application_test.py b/tensorflow/tensorboard/backend/application_test.py
index b4847fcb6c..fd63564e4e 100644
--- a/tensorflow/tensorboard/backend/application_test.py
+++ b/tensorflow/tensorboard/backend/application_test.py
@@ -23,7 +23,6 @@ from __future__ import print_function
import gzip
import json
-import numbers
import os
import shutil
import socket
@@ -83,8 +82,10 @@ class TensorboardServerTest(tf.test.TestCase):
_only_use_meta_graph = False # Server data contains only a GraphDef
def setUp(self):
- self.temp_dir = self._GenerateTestData()
- multiplexer = event_multiplexer.EventMultiplexer(
+ self.logdir = self.get_temp_dir()
+
+ self._GenerateTestData(run_name='run1')
+ self._multiplexer = event_multiplexer.EventMultiplexer(
size_guidance=application.DEFAULT_SIZE_GUIDANCE,
purge_orphaned_data=True)
plugins = [
@@ -92,7 +93,7 @@ class TensorboardServerTest(tf.test.TestCase):
FakePlugin(plugin_name='bar', is_active_value=False, routes_mapping={})
]
app = application.TensorBoardWSGIApp(
- self.temp_dir, plugins, multiplexer, reload_interval=0)
+ self.logdir, plugins, self._multiplexer, reload_interval=0)
try:
self._server = serving.BaseWSGIServer('localhost', 0, app)
# 0 to pick an unused port.
@@ -145,7 +146,7 @@ class TensorboardServerTest(tf.test.TestCase):
def testLogdir(self):
"""Test the format of the data/logdir endpoint."""
parsed_object = self._getJson('/data/logdir')
- self.assertEqual(parsed_object, {'logdir': self.temp_dir})
+ self.assertEqual(parsed_object, {'logdir': self.logdir})
def testPluginsListing(self):
"""Test the format of the data/plugins_listing endpoint."""
@@ -156,20 +157,61 @@ class TensorboardServerTest(tf.test.TestCase):
def testRuns(self):
"""Test the format of the /data/runs endpoint."""
run_json = self._getJson('/data/runs')
-
- # Don't check the actual timestamp since it's time-dependent.
- self.assertTrue(
- isinstance(run_json['run1']['firstEventTimestamp'], numbers.Number))
- del run_json['run1']['firstEventTimestamp']
- self.assertEqual(
- run_json,
- {
- 'run1': {
- # if only_use_meta_graph, the graph is from the metagraph
- 'meta_graph': self._only_use_meta_graph,
- 'tensors': [],
- }
- })
+ self.assertEqual(run_json, ['run1'])
+
+ def testRunsAppendOnly(self):
+ """Test that new runs appear after old ones in /data/runs."""
+ # We use three runs: the 'run1' that we already created in our
+ # `setUp` method, plus runs with names lexicographically before and
+ # after it (so that just sorting by name doesn't have a chance of
+ # working).
+ fake_wall_times = {
+ 'run1': 1234.0,
+ 'avocado': 2345.0,
+ 'zebra': 3456.0,
+ 'mysterious': None,
+ }
+
+ stubs = tf.test.StubOutForTesting()
+ # pylint: disable=invalid-name
+ def FirstEventTimestamp_stub(multiplexer_self, run_name):
+ del multiplexer_self
+ matches = [candidate_name
+ for candidate_name in fake_wall_times
+ if run_name.endswith(candidate_name)]
+ self.assertEqual(len(matches), 1, '%s (%s)' % (matches, run_name))
+ wall_time = fake_wall_times[matches[0]]
+ if wall_time is None:
+ raise ValueError('No event timestamp could be found')
+ else:
+ return wall_time
+ # pylint: enable=invalid-name
+
+ stubs.SmartSet(self._multiplexer,
+ 'FirstEventTimestamp',
+ FirstEventTimestamp_stub)
+
+ def add_run(run_name):
+ self._GenerateTestData(run_name)
+ self._multiplexer.AddRunsFromDirectory(self.logdir)
+ self._multiplexer.Reload()
+
+ # Add one run: it should come last.
+ add_run('avocado')
+ self.assertEqual(self._getJson('/data/runs'),
+ ['run1', 'avocado'])
+
+ # Add another run: it should come last, too.
+ add_run('zebra')
+ self.assertEqual(self._getJson('/data/runs'),
+ ['run1', 'avocado', 'zebra'])
+
+ # And maybe there's a run for which we somehow have no timestamp.
+ add_run('mysterious')
+ self.assertEqual(self._getJson('/data/runs'),
+ ['run1', 'avocado', 'zebra', 'mysterious'])
+
+ stubs.UnsetAll()
def testApplicationPaths_getCached(self):
"""Test the format of the /data/runs endpoint."""
@@ -197,20 +239,20 @@ class TensorboardServerTest(tf.test.TestCase):
response.read()
connection.close()
- def _GenerateTestData(self):
+ def _GenerateTestData(self, run_name):
"""Generates the test data directory.
- The test data has a single run named run1 which contains:
- - a graph definition and metagraph definition
+ The test data has a single run of the given name, containing:
+ - a graph definition and metagraph definition
- Returns:
- temp_dir: The directory the test data is generated under.
+ Arguments:
+ run_name: the directory under self.logdir into which to write
+ events
"""
- temp_dir = tempfile.mkdtemp(prefix=self.get_temp_dir())
- self.addCleanup(shutil.rmtree, temp_dir)
- run1_path = os.path.join(temp_dir, 'run1')
- os.makedirs(run1_path)
- writer = tf.summary.FileWriter(run1_path)
+ run_path = os.path.join(self.logdir, run_name)
+ os.makedirs(run_path)
+
+ writer = tf.summary.FileWriter(run_path)
# Add a simple graph event.
graph_def = tf.GraphDef()
@@ -230,8 +272,6 @@ class TensorboardServerTest(tf.test.TestCase):
writer.flush()
writer.close()
- return temp_dir
-
class TensorboardServerPluginNameTest(tf.test.TestCase):
diff --git a/tensorflow/tensorboard/http_api.md b/tensorflow/tensorboard/http_api.md
index c62de0376d..c2885daf93 100644
--- a/tensorflow/tensorboard/http_api.md
+++ b/tensorflow/tensorboard/http_api.md
@@ -45,40 +45,18 @@ requests to an inactive plugin - the routes of an inactive plugin do not work.
## `data/runs`
-> **NOTE:** TensorBoard is migrating away from this API. Over time, each tag
-> type will own a dedicated route, such as `data/plugin/scalars/tags`.
+Returns an array containing the names of all the runs known to the
+TensorBoard backend at this time. Each entry is a string corresponding
+to a single run.
-Returns a dictionary mapping from `run name` (quoted string) to dictionaries
-mapping from most available tagTypes (see "Migrations" below) to a list of tags
-of that type available for the run. Think of this as a comprehensive index of
-all of the data available from the TensorBoard server. Here is an example:
+We guarantee that as new runs are created in the log directory, they
+will always appear at the end of the list returned by this route. That
+is, the order of runs is persistent, and the result of this route is an
+&ldquo;append-only&rdquo; list.
- {
- "train_run": {
- "firstEventTimestamp": 123456.789
- },
- "eval": {
- }
- }
-
-The `firstEventTimestamp` value is in seconds since the epoch.
-
-Note that the same tag may be present for many runs. It is not guaranteed that
-they will have the same meaning across runs. It is also not guaranteed that they
-will have the same tag type across different runs.
-
-### Migrations
-
-Each of the following tag types `<t>` has been migrated to `/data/plugin/<t>/tags`,
-and will not appear in the output from this route:
+Example response:
- - `audio`
- - `images`
- - `scalars`
- - `compressedHistograms`, moved to `distributions`
- - `histograms`
- - `graph`, as `/data/plugin/graphs/runs`
- - `run_metadata`, as `/data/plugin/graphs/run_metadata_tags`
+ ["train_run", "eval"]
## `/data/plugin/scalars/tags`