diff options
author | 2016-11-08 12:33:51 -0800 | |
---|---|---|
committer | 2016-11-08 16:28:34 -0800 | |
commit | c34e3c87ac0ec709fa19605a9d711462d6a9e9d6 (patch) | |
tree | d15fbb88702fa58e82940ade5fd38b20c73327a0 | |
parent | e4e580367efb8ac7c6ea1974c2ef5725f139b4cb (diff) |
Serve tensors from server as bytes to avoid the ~300MB browser string limit.
Also store data points as Float32Array[] instead of number[][], i.e. each data point is Float32Array. This reduces memory usage by 2x.
Change: 138550170
10 files changed, 91 insertions, 57 deletions
diff --git a/tensorflow/tensorboard/backend/server_test.py b/tensorflow/tensorboard/backend/server_test.py index 714796644e..c3088491a9 100644 --- a/tensorflow/tensorboard/backend/server_test.py +++ b/tensorflow/tensorboard/backend/server_test.py @@ -29,8 +29,8 @@ import os import shutil import tempfile import threading -import zlib +import numpy as np from six import BytesIO from six.moves import http_client from six.moves import xrange # pylint: disable=redefined-builtin @@ -264,9 +264,11 @@ class TensorboardServerTest(tf.test.TestCase): if 'projector' not in REGISTERED_PLUGINS: return - tensor_tsv = (self._get('/data/plugin/projector/tensor?run=run1&name=var1') - .read()) - self.assertEqual(tensor_tsv, b'6.0\t6.0') + url = '/data/plugin/projector/tensor?run=run1&name=var1' + tensor_bytes = self._get(url).read() + tensor = np.reshape(np.fromstring(tensor_bytes, dtype='float32'), [1, 2]) + expected_tensor = np.array([[6, 6]], dtype='float32') + self.assertTrue(np.array_equal(tensor, expected_tensor)) def testAcceptGzip_compressesResponse(self): response = self._get('/data/graph?run=run1&limit_attr_size=1024' diff --git a/tensorflow/tensorboard/components/vz_projector/data-provider-proto.ts b/tensorflow/tensorboard/components/vz_projector/data-provider-proto.ts index 656173f265..cbb8b412ea 100644 --- a/tensorflow/tensorboard/components/vz_projector/data-provider-proto.ts +++ b/tensorflow/tensorboard/components/vz_projector/data-provider-proto.ts @@ -79,13 +79,9 @@ export class ProtoDataProvider implements DataProvider { throw 'The shape doesn\'t match the length of the flattened array'; } for (let i = 0; i < n; i++) { - let vector: number[] = []; let offset = i * d; - for (let j = 0; j < d; j++) { - vector.push(tensor[offset++]); - } points.push({ - vector: vector, + vector: new Float32Array(tensor.slice(offset, offset + d)), metadata: {}, projections: null, index: i diff --git a/tensorflow/tensorboard/components/vz_projector/data-provider-server.ts b/tensorflow/tensorboard/components/vz_projector/data-provider-server.ts index f663388eb7..a617a981d9 100644 --- a/tensorflow/tensorboard/components/vz_projector/data-provider-server.ts +++ b/tensorflow/tensorboard/components/vz_projector/data-provider-server.ts @@ -82,17 +82,30 @@ export class ServerDataProvider implements DataProvider { callback: (ds: DataSet) => void) { // Get the tensor. logging.setModalMessage('Fetching tensor values...', TENSORS_MSG_ID); - d3.text( - `${this.routePrefix}/tensor?run=${run}&name=${tensorName}`, - (err: any, tsv: string) => { - if (err) { - logging.setModalMessage('Error: ' + err.responseText); - return; - } - dataProvider.parseTensors(tsv).then(dataPoints => { - callback(new DataSet(dataPoints)); - }); + let xhr = new XMLHttpRequest(); + xhr.open('GET', `${this.routePrefix}/tensor?run=${run}&name=${tensorName}`); + xhr.responseType = 'arraybuffer'; + xhr.onprogress = (ev) => { + if (ev.lengthComputable) { + let percent = (ev.loaded * 100 / ev.total).toFixed(1); + logging.setModalMessage('Fetching tensor values: ' + percent + '%', + TENSORS_MSG_ID); + } + }; + xhr.onload = () => { + let data = new Float32Array(xhr.response); + this.getEmbeddingInfo(run, tensorName, embedding => { + let dim = embedding.tensorShape[1]; + dataProvider.parseTensorsFromFloat32Array(data, dim).then( + dataPoints => { + callback(new DataSet(dataPoints)); }); + }); + }; + xhr.onerror = () => { + logging.setModalMessage('Error: ' + xhr.responseText); + }; + xhr.send(null); } retrieveSpriteAndMetadata(run: string, tensorName: string, diff --git a/tensorflow/tensorboard/components/vz_projector/data-provider.ts b/tensorflow/tensorboard/components/vz_projector/data-provider.ts index 4f74a44e8b..c774c50aa4 100644 --- a/tensorflow/tensorboard/components/vz_projector/data-provider.ts +++ b/tensorflow/tensorboard/components/vz_projector/data-provider.ts @@ -122,9 +122,9 @@ export function parseTensors( // If the first label is not a number, take it as the label. if (isNaN(row[0] as any) || numDim === row.length - 1) { dataPoint.metadata['label'] = row[0]; - dataPoint.vector = row.slice(1).map(Number); + dataPoint.vector = new Float32Array(row.slice(1).map(Number)); } else { - dataPoint.vector = row.map(Number); + dataPoint.vector = new Float32Array(row.map(Number)); } data.push(dataPoint); if (numDim == null) { @@ -148,6 +148,29 @@ export function parseTensors( }); } +/** Parses a tsv text file. */ +export function parseTensorsFromFloat32Array(data: Float32Array, + dim: number): Promise<DataPoint[]> { + return runAsyncTask('Parsing tensors...', () => { + let N = data.length / dim; + let dataPoints: DataPoint[] = []; + let offset = 0; + for (let i = 0; i < N; ++i) { + dataPoints.push({ + metadata: {}, + vector: data.subarray(offset, offset + dim), + index: i, + projections: null, + }); + offset += dim; + } + return dataPoints; + }, TENSORS_MSG_ID).then(dataPoints => { + logging.setModalMessage(null, TENSORS_MSG_ID); + return dataPoints; + }); +} + export function analyzeMetadata( columnNames, pointsMetadata: PointMetadata[]): ColumnStats[] { let columnStats: ColumnStats[] = columnNames.map(name => { diff --git a/tensorflow/tensorboard/components/vz_projector/data.ts b/tensorflow/tensorboard/components/vz_projector/data.ts index 0f5cf31dca..f7766ac623 100644 --- a/tensorflow/tensorboard/components/vz_projector/data.ts +++ b/tensorflow/tensorboard/components/vz_projector/data.ts @@ -60,7 +60,7 @@ export interface SpriteAndMetadataInfo { export interface DataPoint extends scatterPlot.DataPoint { /** The point in the original space. */ - vector: number[]; + vector: Float32Array; /* * Metadata for each point. Each metadata is a set of key/value pairs diff --git a/tensorflow/tensorboard/components/vz_projector/data_test.ts b/tensorflow/tensorboard/components/vz_projector/data_test.ts index 34795b33e2..6d92287637 100644 --- a/tensorflow/tensorboard/components/vz_projector/data_test.ts +++ b/tensorflow/tensorboard/components/vz_projector/data_test.ts @@ -28,7 +28,7 @@ function makePointsWithTraces(traces: number[]) { let metadata: {[key: string]: any} = {}; metadata[nextAttr] = t >= 0 ? t : null; points.push({ - vector: [], + vector: new Float32Array(0), metadata: metadata, projections: {}, index: i diff --git a/tensorflow/tensorboard/components/vz_projector/knn.ts b/tensorflow/tensorboard/components/vz_projector/knn.ts index 4db45d207b..906e077b5d 100644 --- a/tensorflow/tensorboard/components/vz_projector/knn.ts +++ b/tensorflow/tensorboard/components/vz_projector/knn.ts @@ -16,6 +16,7 @@ limitations under the License. import {runAsyncTask} from './util'; import * as logging from './logging'; import {KMin} from './heap'; +import {Vector} from './vector'; import * as vector from './vector'; export type NearestEntry = { @@ -46,7 +47,7 @@ const KNN_GPU_MSG_ID = 'knn-gpu'; */ export function findKNNGPUCosine<T>( dataPoints: T[], k: number, - accessor: (dataPoint: T) => number[]): Promise<NearestEntry[][]> { + accessor: (dataPoint: T) => Float32Array): Promise<NearestEntry[][]> { let N = dataPoints.length; let dim = accessor(dataPoints[0]).length; @@ -139,8 +140,8 @@ export function findKNNGPUCosine<T>( * distance is above the limit. */ export function findKNN<T>( - dataPoints: T[], k: number, accessor: (dataPoint: T) => number[], - dist: (a: number[], b: number[], limit: number) => + dataPoints: T[], k: number, accessor: (dataPoint: T) => Float32Array, + dist: (a: Vector, b: Vector, limit: number) => number): Promise<NearestEntry[][]> { return runAsyncTask<NearestEntry[][]>('Finding nearest neighbors...', () => { let N = dataPoints.length; @@ -218,8 +219,8 @@ function minDist( */ export function findKNNofPoint<T>( dataPoints: T[], pointIndex: number, k: number, - accessor: (dataPoint: T) => number[], - distance: (a: number[], b: number[]) => number) { + accessor: (dataPoint: T) => Float32Array, + distance: (a: Vector, b: Vector) => number) { let kMin = new KMin<NearestEntry>(k); let a = accessor(dataPoints[pointIndex]); for (let i = 0; i < dataPoints.length; ++i) { diff --git a/tensorflow/tensorboard/components/vz_projector/vector.ts b/tensorflow/tensorboard/components/vz_projector/vector.ts index 7f7d9f30f7..fd092bc4d1 100644 --- a/tensorflow/tensorboard/components/vz_projector/vector.ts +++ b/tensorflow/tensorboard/components/vz_projector/vector.ts @@ -19,7 +19,7 @@ import {assert} from './util'; * @fileoverview Useful vector utilities. */ -export type Vector = number[]; +export type Vector = Float32Array | number[]; export type Point2D = [number, number]; export type Point3D = [number, number, number]; @@ -43,9 +43,9 @@ export function sum(a: Vector): number { } /** Returns the sum of two vectors, i.e. a + b */ -export function add(a: Vector, b: Vector): Vector { +export function add(a: Vector, b: Vector): Float32Array { assert(a.length === b.length, 'Vectors a and b must be of same length'); - let result = new Array(a.length); + let result = new Float32Array(a.length); for (let i = 0; i < a.length; ++i) { result[i] = a[i] + b[i]; } @@ -53,9 +53,9 @@ export function add(a: Vector, b: Vector): Vector { } /** Subtracts vector b from vector a, i.e. returns a - b */ -export function sub(a: Vector, b: Vector): Vector { +export function sub(a: Vector, b: Vector): Float32Array { assert(a.length === b.length, 'Vectors a and b must be of same length'); - let result = new Array(a.length); + let result = new Float32Array(a.length); for (let i = 0; i < a.length; ++i) { result[i] = a[i] - b[i]; } @@ -151,12 +151,13 @@ export function unit(a: Vector): void { * @param vectors Array of vectors to be projected. * @param newDim The resulting dimension of the vectors. */ -export function projectRandom(vectors: number[][], newDim: number): number[][] { +export function projectRandom(vectors: Float32Array[], newDim: number): + Float32Array[] { let dim = vectors[0].length; let N = vectors.length; - let newVectors: number[][] = new Array(N); + let newVectors: Float32Array[] = new Array(N); for (let i = 0; i < N; ++i) { - newVectors[i] = new Array(newDim); + newVectors[i] = new Float32Array(newDim); } // Make nDim projections. for (let k = 0; k < newDim; ++k) { @@ -175,15 +176,6 @@ export function project2d(a: Vector, dir1: Vector, dir2: Vector): Point2D { return [dot(a, dir1), dot(a, dir2)]; } -/** Returns a vector filled with zeros */ -export function zeros(length: number): Vector { - let result = new Array(length); - for (let i = 0; i < length; ++i) { - result[i] = 0; - } - return result; -} - /** * Computes the centroid of the data points. If the provided data points are not * vectors, an accessor function needs to be provided. @@ -197,7 +189,7 @@ export function centroid<T>(dataPoints: T[], accessor?: (a: T) => Vector): accessor = (a: T) => <any>a; } assert(dataPoints.length >= 0, '`vectors` must be of length >= 1'); - let centroid = zeros(accessor(dataPoints[0]).length); + let centroid = new Float32Array(accessor(dataPoints[0]).length); for (let i = 0; i < dataPoints.length; ++i) { let dataPoint = dataPoints[i]; let vector = accessor(dataPoint); @@ -215,9 +207,9 @@ export function centroid<T>(dataPoints: T[], accessor?: (a: T) => Vector): * Generates a vector of the specified size where each component is drawn from * a random (0, 1) gaussian distribution. */ -export function rn(size: number): Vector { +export function rn(size: number): Float32Array { let normal = d3.random.normal(); - let result = new Array(size); + let result = new Float32Array(size); for (let i = 0; i < size; ++i) { result[i] = normal(); } @@ -249,7 +241,7 @@ export function cosSim(a: Vector, b: Vector): number { * typed array with row-first order. */ export function toTypedArray<T>( - dataPoints: T[], accessor: (dataPoint: T) => number[]): Float32Array { + dataPoints: T[], accessor: (dataPoint: T) => Float32Array): Float32Array { let N = dataPoints.length; let dim = accessor(dataPoints[0]).length; let result = new Float32Array(N * dim); diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts index 7b28b94060..6a26c985e8 100644 --- a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts +++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ import {DataSet, SpriteAndMetadataInfo, PCA_SAMPLE_DIM, Projection, SAMPLE_SIZE, State} from './data'; +import {Vector} from './vector'; import * as vector from './vector'; import {Projector} from './vz-projector'; import {ProjectorInput} from './vz-projector-input'; @@ -44,12 +45,16 @@ export let ProjectionsPanelPolymer = PolymerElement({ type InputControlName = 'xLeft' | 'xRight' | 'yUp' | 'yDown'; type CentroidResult = { - centroid?: number[]; numMatches?: number; + centroid?: Vector; + numMatches?: number; }; type Centroids = { - [key: string]: number[]; xLeft: number[]; xRight: number[]; yUp: number[]; - yDown: number[]; + [key: string]: Vector; + xLeft: Vector; + xRight: Vector; + yUp: Vector; + yDown: Vector; }; /** diff --git a/tensorflow/tensorboard/plugins/projector/plugin.py b/tensorflow/tensorboard/plugins/projector/plugin.py index f6dd2bcad7..12619637ce 100644 --- a/tensorflow/tensorboard/plugins/projector/plugin.py +++ b/tensorflow/tensorboard/plugins/projector/plugin.py @@ -20,6 +20,7 @@ from __future__ import print_function import imghdr import os +import numpy as np from google.protobuf import json_format from google.protobuf import text_format @@ -57,8 +58,8 @@ def _read_tensor_file(fpath): tensor = [] for line in f: if line: - tensor.append(line.rstrip('\n').split('\t')) - return tensor + tensor.append(map(float, line.rstrip('\n').split('\t'))) + return np.array(tensor, dtype='float32') def _latest_checkpoints_changed(configs, run_path_pairs): @@ -341,9 +342,10 @@ class ProjectorPlugin(TBPlugin): # Sample the tensor tensor = tensor[:LIMIT_NUM_POINTS] - # Stream it as TSV. - tsv = '\n'.join(['\t'.join([str(val) for val in row]) for row in tensor]) - request.respond(tsv, 'text/tab-separated-values') + if tensor.dtype != 'float32': + tensor = tensor.astype(dtype='float32', copy=False) + data_bytes = tensor.tobytes() + request.respond(data_bytes, 'application/octet-stream') def _serve_bookmarks(self, request, query_params): run = query_params.get('run') |