From 9043f935344ae95bd397947d0c385ccef8ac7037 Mon Sep 17 00:00:00 2001 From: Dan Smilkov Date: Tue, 5 Apr 2016 07:08:40 -0800 Subject: Adding stats (run metadata) to the Graph UI. Todos: - Add actual memory and cpu data to the info card. Change: 119050200 --- .../components/tf-graph-app/tf-graph-app.html | 10 +- .../components/tf-graph-board/tf-graph-board.html | 6 +- .../components/tf-graph-common/lib/common.ts | 40 +++--- .../components/tf-graph-common/lib/graph.ts | 57 +++++---- .../components/tf-graph-common/lib/hierarchy.ts | 44 +++++-- .../components/tf-graph-common/lib/parser.ts | 141 ++++++++++----------- .../components/tf-graph-common/lib/render.ts | 26 ++-- .../components/tf-graph-common/test/parser-test.ts | 2 +- .../tf-graph-dashboard/tf-graph-dashboard.html | 47 +++---- .../tf-graph-loader/tf-graph-loader.html | 62 ++++----- .../components/tf-graph/demo/tf-graph-demo.html | 83 +++++++++--- .../components/tf-graph/tf-graph-controls.html | 78 ++++++++---- .../components/tf-graph/tf-graph-scene.html | 14 +- .../tensorboard/components/tf-graph/tf-graph.html | 13 +- 14 files changed, 372 insertions(+), 251 deletions(-) diff --git a/tensorflow/tensorboard/components/tf-graph-app/tf-graph-app.html b/tensorflow/tensorboard/components/tf-graph-app/tf-graph-app.html index a21202b834..a98a1e155c 100644 --- a/tensorflow/tensorboard/components/tf-graph-app/tf-graph-app.html +++ b/tensorflow/tensorboard/components/tf-graph-app/tf-graph-app.html @@ -45,14 +45,13 @@ Example
@@ -60,8 +59,7 @@ Example @@ -150,9 +150,7 @@ Polymer({ // Public API. graphHierarchy: Object, graph: Object, - graphName: String, - // True if the graph data has also run-time stats. - hasStats: Boolean, + stats: Object, /** * @type {value: number, msg: string} * diff --git a/tensorflow/tensorboard/components/tf-graph-common/lib/common.ts b/tensorflow/tensorboard/components/tf-graph-common/lib/common.ts index ab301a210e..18c35f1d96 100644 --- a/tensorflow/tensorboard/components/tf-graph-common/lib/common.ts +++ b/tensorflow/tensorboard/components/tf-graph-common/lib/common.ts @@ -239,31 +239,31 @@ export interface TFNode { /** * TensorFlow stats file definition as defined in the stats proto file. */ -export interface TFStats { - devStats: {device: string, nodeStats: TFNodeStats[]}[]; +export interface StepStats { + dev_stats: {device: string, node_stats: NodeStats[]}[]; } /** * TensorFlow stats for a node as defined in the stats proto file. */ -export interface TFNodeStats { - nodeName: string; +export interface NodeStats { + node_name: string; // The next 4 properties are currently stored as string in json // and must be parsed. - allStartMicros: number; - opStartRelMicros: number; - opEndRelMicros: number; - allEndRelMicros: number; + all_start_micros: number; + op_start_rel_micros: number; + op_end_rel_micros: number; + all_end_rel_micros: number; memory: { - allocatorName: string; - totalBytes: number; // Stored as string in json and should be parsed. - peakBytes: number; // Stored as string in json and should be parsed. + allocator_name: string; + total_bytes: number; // Stored as string in json and should be parsed. + peak_bytes: number; // Stored as string in json and should be parsed. }[]; /** Output sizes recorded for a single execution of a graph node */ output: TFNodeOutput[]; - timelineLabel: string; - scheduledMicros: string; - threadId: string; + timeline_label: string; + scheduled_micros: string; + thread_id: string; } /** @@ -271,9 +271,7 @@ export interface TFNodeStats { */ export interface TFNodeOutput { slot: number; // Stored as string in json and should be parsed. - /** Was the tensor allocated by this Op or a previous computation */ - allocationType: string; - tensorDescription: { + tensor_description: { /** Data type of tensor elements */ dtype: string; /** Shape of the tensor */ @@ -292,15 +290,15 @@ export interface TFNodeOutput { }[]; }; /** Information about the size and allocator used for the data */ - allocationDescription: { + allocation_description: { // The next 2 properties are stored as string in json and // should be parsed. /** Total number of bytes requested */ - requestedBytes: number; + requested_bytes: number; /** Total number of bytes allocated, if known */ - allocatedBytes?: number; + allocated_bytes?: number; /** Name of the allocator used */ - allocatorName: string; + allocator_name: string; }; }; } diff --git a/tensorflow/tensorboard/components/tf-graph-common/lib/graph.ts b/tensorflow/tensorboard/components/tf-graph-common/lib/graph.ts index b2f6d21598..c5266565bd 100644 --- a/tensorflow/tensorboard/components/tf-graph-common/lib/graph.ts +++ b/tensorflow/tensorboard/components/tf-graph-common/lib/graph.ts @@ -376,35 +376,40 @@ export function createMetanode(name: string, opt = {}): Metanode { * graph information. */ export function joinStatsInfoWithGraph(graph: SlimGraph, - statsJson: TFStats): void { - _.each(statsJson.devStats, stats => { - _.each(stats.nodeStats, nodeStats => { + stats: StepStats): void { + _.each(stats.dev_stats, devStats => { + _.each(devStats.node_stats, nodeStats => { // Lookup the node in the graph by its original name, e.g. A. If not // found, lookup by the rewritten name A/(A) in case the name is both // a namespace and a node name. - let nodeName = nodeStats.nodeName in graph.nodes ? - nodeStats.nodeName : - nodeStats.nodeName + NAMESPACE_DELIM + "(" + nodeStats.nodeName + ")"; - if (nodeName in graph.nodes) { - // Compute the total bytes used. - let totalBytes = 0; - if (nodeStats.memory) { - _.each(nodeStats.memory, alloc => { - if (alloc.totalBytes) { - totalBytes += Number(alloc.totalBytes); - } - }); - } - let outputSize: number[][] = null; - if (nodeStats.output) { - outputSize = _.map(nodeStats.output, output => { - return _.map(output.tensorDescription.shape.dim, - dim => Number(dim.size)); - }); - } - graph.nodes[nodeName].stats = new NodeStats(totalBytes, - Number(nodeStats.allEndRelMicros), outputSize); + let nodeName = nodeStats.node_name in graph.nodes ? + nodeStats.node_name : + nodeStats.node_name + NAMESPACE_DELIM + "(" + nodeStats.node_name + ")"; + + // Couldn't find a matching node. + if (!(nodeName in graph.nodes)) { + return; + } + + // Compute the total bytes used. + let totalBytes = 0; + if (nodeStats.memory) { + _.each(nodeStats.memory, alloc => { + if (alloc.total_bytes) { + totalBytes += Number(alloc.total_bytes); + } + }); + } + let outputSize: number[][] = null; + if (nodeStats.output) { + outputSize = _.map(nodeStats.output, output => { + return _.map(output.tensor_description.shape.dim, + dim => Number(dim.size)); + }); } + graph.nodes[nodeName].device = devStats.device; + graph.nodes[nodeName].stats = new NodeStats(totalBytes, + Number(nodeStats.all_end_rel_micros), outputSize); }); }); } @@ -492,7 +497,6 @@ class MetanodeImpl implements Metanode { this.templateId = null; /** Metanode which contains this node, if any */ this.parentNode = null; - this.stats = new NodeStats(0, 0, null); this.hasNonControlEdges = false; this.include = InclusionType.UNSPECIFIED; } @@ -705,7 +709,6 @@ class SeriesNodeImpl implements SeriesNode { this.parentNode = null; this.deviceHistogram = {}; this.hasNonControlEdges = false; - this.stats = new NodeStats(0, 0, null); this.include = InclusionType.UNSPECIFIED; } } diff --git a/tensorflow/tensorboard/components/tf-graph-common/lib/hierarchy.ts b/tensorflow/tensorboard/components/tf-graph-common/lib/hierarchy.ts index 8dca63c9ab..ef6d69d4d5 100644 --- a/tensorflow/tensorboard/components/tf-graph-common/lib/hierarchy.ts +++ b/tensorflow/tensorboard/components/tf-graph-common/lib/hierarchy.ts @@ -428,6 +428,42 @@ export function build(graph: tf.graph.SlimGraph, params: HierarchyParams, }); }; +export function joinAndAggregateStats(h: Hierarchy, stats: StepStats) { + // Get all the possible device names. + let deviceNames = {}; + _.each(h.root.leaves(), nodeName => { + let leaf = h.node(nodeName); + if (leaf.device != null) { + deviceNames[leaf.device] = true; + } + }); + h.devices = _.keys(deviceNames); + + // Reset stats for each group node. + _.each(h.getNodeMap(), (node, nodeName) => { + if (node.isGroupNode) { + node.stats = new NodeStats(0, 0, null); + (node).deviceHistogram = {}; + } + }); + + // Bubble-up the stats and device distribution from leaves to parents. + _.each(h.root.leaves(), nodeName => { + let leaf = h.node(nodeName); + let node = leaf; + while (node.parentNode != null) { + if (leaf.device != null) { + let deviceHistogram = (node.parentNode).deviceHistogram; + deviceHistogram[leaf.device] = (deviceHistogram[leaf.device] || 0) + 1; + } + if (leaf.stats != null) { + node.parentNode.stats.combine(leaf.stats); + } + node = node.parentNode; + } + }); +} + /** * Creates the metanodes in the hierarchical graph and assigns parent-child * relationship between them. @@ -446,9 +482,6 @@ function addNodes(h: Hierarchy, graph: SlimGraph) { parent.depth = Math.max(parent.depth, path.length - i); parent.cardinality += node.cardinality; parent.opHistogram[node.op] = (parent.opHistogram[node.op] || 0) + 1; - if (node.stats) { - parent.stats.combine(node.stats); - } if (node.device != null) { parent.deviceHistogram[node.device] = (parent.deviceHistogram[node.device] || 0) + 1; @@ -623,11 +656,6 @@ function groupSeries(metanode: Metanode, hierarchy: Hierarchy, } child.parentNode = seriesNode; seriesNames[n] = seriesName; - - if (child.stats) { - seriesNode.stats.combine(child.stats); - } - // Remove now-grouped node from its original parent's metagraph. metagraph.removeNode(n); }); diff --git a/tensorflow/tensorboard/components/tf-graph-common/lib/parser.ts b/tensorflow/tensorboard/components/tf-graph-common/lib/parser.ts index 0ed1bd1961..865b6e6761 100644 --- a/tensorflow/tensorboard/components/tf-graph-common/lib/parser.ts +++ b/tensorflow/tensorboard/components/tf-graph-common/lib/parser.ts @@ -37,7 +37,7 @@ function parseValue(value: string): string|number|boolean { /** * Fetches a text file and returns a promise of the result. */ -export function readPbTxt(filepath: string): Promise { +export function fetchPbTxt(filepath: string): Promise { return new Promise(function(resolve, reject) { d3.text(filepath, function(error, text) { if (error) { @@ -50,52 +50,36 @@ export function readPbTxt(filepath: string): Promise { } /** - * Fetches and parses a json file and returns a promise of the result. + * Fetches the metadata file, parses it and returns a promise of the result. */ -export function readJson(filepath: string): Promise { - return new Promise(function(resolve, reject) { - d3.json(filepath, function(error, text) { - if (error) { - reject(error); - return; - } - resolve(text); - }); +export function fetchAndParseMetadata(path: string, tracker: ProgressTracker) { + return runTask("Reading metadata pbtxt", 40, () => { + if (path == null) { + return Promise.resolve(null); + } + return fetchPbTxt(path).then(text => new Blob([text])); + }, tracker) + .then((blob: Blob) => { + return runTask("Parsing metadata.pbtxt", 60, () => { + return blob != null ? parseStatsPbTxt(blob) : null; + }, tracker); }); } /** - * Reads the graph and stats file (if available), parses them and returns a - * promise of the result. + * Fetches the graph file, parses it and returns a promise of the result. */ -export function readAndParseData(dataset: {path: string, statsPath: string}, - pbTxtFile: Blob, tracker: ProgressTracker): - Promise<{ nodes: TFNode[], statsJson: Object }|void> { - let graphPbTxt: Blob; - let statsJson: Object; - return runTask("Reading graph.pbtxt", 20, () => { +export function fetchAndParseGraphData(path: string, pbTxtFile: Blob, + tracker: ProgressTracker) { + return runTask("Reading graph pbtxt", 40, () => { return pbTxtFile ? Promise.resolve(pbTxtFile) : - readPbTxt(dataset.path).then(text => new Blob([text])); + fetchPbTxt(path).then(text => new Blob([text])); }, tracker) .then(blob => { - graphPbTxt = blob; - return runTask("Reading stats.pbtxt", 20, () => { - return (dataset != null && dataset.statsPath != null) ? - readJson(dataset.statsPath) : null; - }, tracker); - }) - .then(json => { - statsJson = json; return runTask("Parsing graph.pbtxt", 60, () => { - return parsePbtxtFile(graphPbTxt); + return parseGraphPbTxt(blob); }, tracker); - }) - .then(nodes => { - return { - nodes: nodes, - statsJson: statsJson - }; }); } @@ -158,13 +142,59 @@ export function streamParse(file: Blob, callback: (string) => void, } /** - * Parses a proto txt file or blob into javascript object. + * Since proto-txt doesn't explicitly say whether an attribute is repeated + * (an array) or not, we keep a hard-coded list of attributes that are known + * to be repeated. This list is used in parsing time to convert repeated + * attributes into arrays even when the attribute only shows up once in the + * object. + */ +const GRAPH_REPEATED_FIELDS: {[attrPath: string]: boolean} = { + "node": true, + "node.input": true, + "node.attr": true, + "node.attr.value.list.type": true, + "node.attr.value.shape.dim": true, + "node.attr.value.tensor.string_val": true, + "node.attr.value.tensor.tensor_shape.dim": true, + "node.attr.value.list.shape": true, + "node.attr.value.list.shape.dim": true, + "node.attr.value.list.s": true +}; + +const METADATA_REPEATED_FIELDS: {[attrPath: string]: boolean} = { + "step_stats.dev_stats": true, + "step_stats.dev_stats.node_stats": true, + "step_stats.dev_stats.node_stats.output": true, + "step_stats.dev_stats.node_stats.memory": true, + "step_stats.dev_stats.node_stats.output.tensor_description.shape.dim": true +}; + +/** + * Parses a blob of proto txt file into a raw Graph object. + */ +export function parseGraphPbTxt(input: Blob): Promise { + return parsePbtxtFile(input, GRAPH_REPEATED_FIELDS).then(obj => obj["node"]); +} + +/** + * Parses a blob of proto txt file into a StepStats object. + */ +function parseStatsPbTxt(input: Blob): Promise { + return parsePbtxtFile(input, METADATA_REPEATED_FIELDS) + .then(obj => obj["step_stats"]); +} + +/** + * Parses a blob of proto txt file into javascript object. * * @param input The Blob or file object implementing slice. + * @param repeatedFields Map (Set) of all the repeated fields, since you can't + * tell directly from the pbtxt if a field is repeated or not. * @returns The parsed object. */ -export function parsePbtxtFile(input: Blob): Promise { - let output: { [name: string]: any; } = { node: [] }; +function parsePbtxtFile(input: Blob, + repeatedFields: {[attrPath: string]: boolean}): Promise { + let output: { [name: string]: any; } = {}; let stack = []; let path: string[] = []; let current: { [name: string]: any; } = output; @@ -179,26 +209,6 @@ export function parsePbtxtFile(input: Blob): Promise { }; } - /** - * Since proto-txt doesn't explicitly say whether an attribute is repeated - * (an array) or not, we keep a hard-coded list of attributes that are known - * to be repeated. This list is used in parsing time to convert repeated - * attributes into arrays even when the attribute only shows up once in the - * object. - */ - let ARRAY_ATTRIBUTES: {[attrPath: string]: boolean} = { - "node": true, - "node.input": true, - "node.attr": true, - "node.attr.value.list.type": true, - "node.attr.value.shape.dim": true, - "node.attr.value.tensor.string_val": true, - "node.attr.value.tensor.tensor_shape.dim": true, - "node.attr.value.list.shape": true, - "node.attr.value.list.shape.dim": true, - "node.attr.value.list.s": true - }; - /** * Adds a value, given the attribute name and the host object. If the * attribute already exists, but is not an array, it will convert it to an @@ -215,7 +225,7 @@ export function parsePbtxtFile(input: Blob): Promise { // We treat "node" specially since it is done so often. let existingValue = obj[name]; if (existingValue == null) { - obj[name] = path.join(".") in ARRAY_ATTRIBUTES ? [value] : value; + obj[name] = path.join(".") in repeatedFields ? [value] : value; } else if (Array.isArray(existingValue)) { existingValue.push(value); } else { @@ -247,19 +257,8 @@ export function parsePbtxtFile(input: Blob): Promise { break; } }).then(function() { - return output["node"]; + return output; }); } -/** - * Parses a proto txt file into a javascript object. - * - * @param input The string contents of the proto txt file. - * @return The parsed object. - */ -export function parsePbtxt(input: string): Promise { - let blob = new Blob([input]); - return parsePbtxtFile(blob); -} - } // Close module tf.graph.parser. diff --git a/tensorflow/tensorboard/components/tf-graph-common/lib/render.ts b/tensorflow/tensorboard/components/tf-graph-common/lib/render.ts index 5110e2f3f7..4033219b62 100644 --- a/tensorflow/tensorboard/components/tf-graph-common/lib/render.ts +++ b/tensorflow/tensorboard/components/tf-graph-common/lib/render.ts @@ -167,12 +167,24 @@ export class RenderGraphInfo { constructor(hierarchy: hierarchy.Hierarchy) { this.hierarchy = hierarchy; this.index = {}; + + this.computeScales(); + // Maps node name to whether the rendering hierarchy was already + // constructed. + this.hasSubhierarchy = {}; + this.root = new RenderGroupNodeInfo(hierarchy.root); + this.index[hierarchy.root.name] = this.root; + this.buildSubhierarchy(hierarchy.root.name); + this.root.expanded = true; + } + + computeScales() { this.deviceColorMap = d3.scale.ordinal() - .domain(hierarchy.devices) - .range(_.map(d3.range(hierarchy.devices.length), + .domain(this.hierarchy.devices) + .range(_.map(d3.range(this.hierarchy.devices.length), MetanodeColors.DEVICE_PALETTE)); - let topLevelGraph = hierarchy.root.metagraph; + let topLevelGraph = this.hierarchy.root.metagraph; // Find the maximum and minimum memory usage. let memoryExtent = d3.extent(topLevelGraph.nodes(), (nodeName, index) => { @@ -198,14 +210,6 @@ export class RenderGraphInfo { this.computeTimeScale = d3.scale.linear() .domain(computeTimeExtent) .range(PARAMS.minMaxColors); - - // Maps node name to whether the rendering hierarchy was already - // constructed. - this.hasSubhierarchy = {}; - this.root = new RenderGroupNodeInfo(hierarchy.root); - this.index[hierarchy.root.name] = this.root; - this.buildSubhierarchy(hierarchy.root.name); - this.root.expanded = true; } /** diff --git a/tensorflow/tensorboard/components/tf-graph-common/test/parser-test.ts b/tensorflow/tensorboard/components/tf-graph-common/test/parser-test.ts index cc4c951f7d..7d510c57ae 100644 --- a/tensorflow/tensorboard/components/tf-graph-common/test/parser-test.ts +++ b/tensorflow/tensorboard/components/tf-graph-common/test/parser-test.ts @@ -32,7 +32,7 @@ test("simple pbtxt", (done) => { input: "Q" input: "W" }`; - tf.graph.parser.parsePbtxt(pbtxt).then(nodes => { + tf.graph.parser.parseGraphPbTxt(new Blob([pbtxt])).then(nodes => { assert.isTrue(nodes != null && nodes.length === 3); done(); }); diff --git a/tensorflow/tensorboard/components/tf-graph-dashboard/tf-graph-dashboard.html b/tensorflow/tensorboard/components/tf-graph-dashboard/tf-graph-dashboard.html index 616e701973..3fb87417e9 100644 --- a/tensorflow/tensorboard/components/tf-graph-dashboard/tf-graph-dashboard.html +++ b/tensorflow/tensorboard/components/tf-graph-dashboard/tf-graph-dashboard.html @@ -27,20 +27,21 @@ by default. The user can select a different run from a dropdown menu.