diff options
author | 2016-11-22 11:25:05 -0800 | |
---|---|---|
committer | 2016-11-22 11:46:08 -0800 | |
commit | 26f423d862bef888aafbbcdedbab103c864dff79 (patch) | |
tree | e135d3b117647871e0660037165fd185b65c26a8 | |
parent | 916db98a61b1b7991e05b36788678eea9569c1c2 (diff) |
Improve PCA speed (sample the number of points, not just dimensions).
Change: 139933186
4 files changed, 37 insertions, 23 deletions
diff --git a/tensorflow/tensorboard/components/vz_projector/data.ts b/tensorflow/tensorboard/components/vz_projector/data.ts index 4adbb56b80..34f275f546 100644 --- a/tensorflow/tensorboard/components/vz_projector/data.ts +++ b/tensorflow/tensorboard/components/vz_projector/data.ts @@ -95,8 +95,8 @@ const IS_FIREFOX = navigator.userAgent.toLowerCase().indexOf('firefox') >= 0; /** Controls whether nearest neighbors computation is done on the GPU or CPU. */ const KNN_GPU_ENABLED = WEBGL_SUPPORT && !IS_FIREFOX; -/** Sampling is used when computing expensive operations such as T-SNE. */ -export const SAMPLE_SIZE = 10000; +export const TSNE_SAMPLE_SIZE = 10000; +export const PCA_SAMPLE_SIZE = 50000; /** Number of dimensions to sample when doing approximate PCA. */ export const PCA_SAMPLE_DIM = 200; /** Number of pca components to compute. */ @@ -115,7 +115,7 @@ export class DataSet { points: DataPoint[]; traces: DataTrace[]; - sampledDataIndices: number[] = []; + shuffledDataIndices: number[] = []; /** * This keeps a list of all current projections so you can easily test to see @@ -137,8 +137,7 @@ export class DataSet { constructor( points: DataPoint[], spriteAndMetadataInfo?: SpriteAndMetadataInfo) { this.points = points; - this.sampledDataIndices = - shuffle(d3.range(this.points.length)).slice(0, SAMPLE_SIZE); + this.shuffledDataIndices = shuffle(d3.range(this.points.length)); this.traces = this.computeTraces(points); this.dim = [this.points.length, this.points[0].vector.length]; this.spriteAndMetadataInfo = spriteAndMetadataInfo; @@ -270,12 +269,15 @@ export class DataSet { return runAsyncTask('Computing PCA...', () => { // Approximate pca vectors by sampling the dimensions. let dim = this.points[0].vector.length; - let vectors = this.points.map(d => d.vector); + let vectors = this.shuffledDataIndices.map(i => this.points[i].vector); if (dim > PCA_SAMPLE_DIM) { vectors = vector.projectRandom(vectors, PCA_SAMPLE_DIM); } + let sampledVectors = vectors.slice(0, PCA_SAMPLE_SIZE); + let sigma = numeric.div( - numeric.dot(numeric.transpose(vectors), vectors), vectors.length); + numeric.dot(numeric.transpose(sampledVectors), sampledVectors), + sampledVectors.length); let svd = numeric.svd(sigma); let variances: number[] = svd.S; @@ -290,13 +292,13 @@ export class DataSet { let U: number[][] = svd.U; let pcaVectors = vectors.map(vector => { - let newV: number[] = []; - for (let d = 0; d < NUM_PCA_COMPONENTS; d++) { + let newV = new Float32Array(NUM_PCA_COMPONENTS); + for (let newDim = 0; newDim < NUM_PCA_COMPONENTS; newDim++) { let dot = 0; - for (let i = 0; i < vector.length; i++) { - dot += vector[i] * U[i][d]; + for (let oldDim = 0; oldDim < vector.length; oldDim++) { + dot += vector[oldDim] * U[oldDim][newDim]; } - newV.push(dot); + newV[newDim] = dot; } return newV; }); @@ -321,6 +323,7 @@ export class DataSet { this.tSNEShouldStop = false; this.tSNEIteration = 0; + let sampledIndices = this.shuffledDataIndices.slice(0, TSNE_SAMPLE_SIZE); let step = () => { if (this.tSNEShouldStop) { stepCallback(null); @@ -329,7 +332,7 @@ export class DataSet { } this.tsne.step(); let result = this.tsne.getSolution(); - this.sampledDataIndices.forEach((index, i) => { + sampledIndices.forEach((index, i) => { let dataPoint = this.points[index]; dataPoint.projections['tsne-0'] = result[i * tsneDim + 0]; @@ -350,7 +353,7 @@ export class DataSet { // We found the nearest neighbors before and will reuse them. knnComputation = Promise.resolve(this.nearest); } else { - let sampledData = this.sampledDataIndices.map(i => this.points[i]); + let sampledData = sampledIndices.map(i => this.points[i]); this.nearestK = k; knnComputation = KNN_GPU_ENABLED ? knn.findKNNGPUCosine(sampledData, k, (d => d.vector)) : diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts index 20b41a3db8..78accd80d0 100644 --- a/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts +++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts @@ -233,7 +233,10 @@ export class DataPanel extends DataPanelPolymer { this.dom.select('#checkpoint-file') .html(wordBreakablePath) .attr('title', this.projectorConfig.modelCheckpointPath); - let defaultTensor = names[0]; + // If in demo mode, let the order decide which tensor to load by default. + let defaultTensor = this.projector.servingMode === 'demo' ? + this.projectorConfig.embeddings[0].tensorName : + names[0]; if (this.selectedTensor === defaultTensor) { // Explicitly call the observer. Polymer won't call it if the previous // string matches the current string. diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html index 32ef680292..cddcb2b7d0 100644 --- a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html +++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html @@ -216,7 +216,7 @@ limitations under the License. </p> <p>Iteration: <span class="run-tsne-iter">0</span></p> <p id="tsne-sampling" class="notice"> - For fast results, the data will be sampled down to [[getTsneSampleSize()]] points. + For fast results, the data will be sampled down to [[getTsneSampleSizeText()]] points. </p> <p> <iron-icon icon="book" class="book-icon"></iron-icon> @@ -285,7 +285,7 @@ limitations under the License. </p> <div id="total-variance">Total variance</div> <paper-tooltip for="pca-sampling" position="top" animation-delay="0" fit-to-visible-bounds> - For fast results, the data was randomly projected down to [[getPcaSampledDim()]] dimensions. + For fast results, the data was sampled to [[getPcaSampleSizeText()]] points and randomly projected down to [[getPcaSampledDimText()]] dimensions. </paper-tooltip> </div> <!-- Custom Controls --> diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts index 9c172e4707..3b40ec27ce 100644 --- a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts +++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -import {DataSet, PCA_SAMPLE_DIM, Projection, ProjectionType, SAMPLE_SIZE, SpriteAndMetadataInfo, State} from './data'; +import {DataSet, PCA_SAMPLE_DIM, PCA_SAMPLE_SIZE, Projection, ProjectionType, SpriteAndMetadataInfo, State, TSNE_SAMPLE_SIZE} from './data'; import * as vector from './vector'; import {Vector} from './vector'; import {Projector} from './vz-projector'; @@ -296,9 +296,13 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer { this.clearCentroids(); this.dom.select('#tsne-sampling') - .style('display', dataSet.points.length > SAMPLE_SIZE ? null : 'none'); + .style( + 'display', + dataSet.points.length > TSNE_SAMPLE_SIZE ? null : 'none'); + let wasSampled = + dataSet.dim[0] > PCA_SAMPLE_SIZE || dataSet.dim[1] > PCA_SAMPLE_DIM; this.dom.select('#pca-sampling') - .style('display', dataSet.dim[1] > PCA_SAMPLE_DIM ? null : 'none'); + .style('display', wasSampled ? null : 'none'); this.showTab('pca'); } @@ -538,12 +542,16 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer { return {centroid: vector.centroid(r, accessor), numMatches: r.length}; } - getPcaSampledDim() { + getPcaSampledDimText() { return PCA_SAMPLE_DIM.toLocaleString(); } - getTsneSampleSize() { - return SAMPLE_SIZE.toLocaleString(); + getPcaSampleSizeText() { + return PCA_SAMPLE_SIZE.toLocaleString(); + } + + getTsneSampleSizeText() { + return TSNE_SAMPLE_SIZE.toLocaleString(); } } |