aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Dan Smilkov <smilkov@google.com>2016-11-22 11:25:05 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-11-22 11:46:08 -0800
commit26f423d862bef888aafbbcdedbab103c864dff79 (patch)
treee135d3b117647871e0660037165fd185b65c26a8
parent916db98a61b1b7991e05b36788678eea9569c1c2 (diff)
Improve PCA speed (sample the number of points, not just dimensions).
Change: 139933186
-rw-r--r--tensorflow/tensorboard/components/vz_projector/data.ts31
-rw-r--r--tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts5
-rw-r--r--tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html4
-rw-r--r--tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts20
4 files changed, 37 insertions, 23 deletions
diff --git a/tensorflow/tensorboard/components/vz_projector/data.ts b/tensorflow/tensorboard/components/vz_projector/data.ts
index 4adbb56b80..34f275f546 100644
--- a/tensorflow/tensorboard/components/vz_projector/data.ts
+++ b/tensorflow/tensorboard/components/vz_projector/data.ts
@@ -95,8 +95,8 @@ const IS_FIREFOX = navigator.userAgent.toLowerCase().indexOf('firefox') >= 0;
/** Controls whether nearest neighbors computation is done on the GPU or CPU. */
const KNN_GPU_ENABLED = WEBGL_SUPPORT && !IS_FIREFOX;
-/** Sampling is used when computing expensive operations such as T-SNE. */
-export const SAMPLE_SIZE = 10000;
+export const TSNE_SAMPLE_SIZE = 10000;
+export const PCA_SAMPLE_SIZE = 50000;
/** Number of dimensions to sample when doing approximate PCA. */
export const PCA_SAMPLE_DIM = 200;
/** Number of pca components to compute. */
@@ -115,7 +115,7 @@ export class DataSet {
points: DataPoint[];
traces: DataTrace[];
- sampledDataIndices: number[] = [];
+ shuffledDataIndices: number[] = [];
/**
* This keeps a list of all current projections so you can easily test to see
@@ -137,8 +137,7 @@ export class DataSet {
constructor(
points: DataPoint[], spriteAndMetadataInfo?: SpriteAndMetadataInfo) {
this.points = points;
- this.sampledDataIndices =
- shuffle(d3.range(this.points.length)).slice(0, SAMPLE_SIZE);
+ this.shuffledDataIndices = shuffle(d3.range(this.points.length));
this.traces = this.computeTraces(points);
this.dim = [this.points.length, this.points[0].vector.length];
this.spriteAndMetadataInfo = spriteAndMetadataInfo;
@@ -270,12 +269,15 @@ export class DataSet {
return runAsyncTask('Computing PCA...', () => {
// Approximate pca vectors by sampling the dimensions.
let dim = this.points[0].vector.length;
- let vectors = this.points.map(d => d.vector);
+ let vectors = this.shuffledDataIndices.map(i => this.points[i].vector);
if (dim > PCA_SAMPLE_DIM) {
vectors = vector.projectRandom(vectors, PCA_SAMPLE_DIM);
}
+ let sampledVectors = vectors.slice(0, PCA_SAMPLE_SIZE);
+
let sigma = numeric.div(
- numeric.dot(numeric.transpose(vectors), vectors), vectors.length);
+ numeric.dot(numeric.transpose(sampledVectors), sampledVectors),
+ sampledVectors.length);
let svd = numeric.svd(sigma);
let variances: number[] = svd.S;
@@ -290,13 +292,13 @@ export class DataSet {
let U: number[][] = svd.U;
let pcaVectors = vectors.map(vector => {
- let newV: number[] = [];
- for (let d = 0; d < NUM_PCA_COMPONENTS; d++) {
+ let newV = new Float32Array(NUM_PCA_COMPONENTS);
+ for (let newDim = 0; newDim < NUM_PCA_COMPONENTS; newDim++) {
let dot = 0;
- for (let i = 0; i < vector.length; i++) {
- dot += vector[i] * U[i][d];
+ for (let oldDim = 0; oldDim < vector.length; oldDim++) {
+ dot += vector[oldDim] * U[oldDim][newDim];
}
- newV.push(dot);
+ newV[newDim] = dot;
}
return newV;
});
@@ -321,6 +323,7 @@ export class DataSet {
this.tSNEShouldStop = false;
this.tSNEIteration = 0;
+ let sampledIndices = this.shuffledDataIndices.slice(0, TSNE_SAMPLE_SIZE);
let step = () => {
if (this.tSNEShouldStop) {
stepCallback(null);
@@ -329,7 +332,7 @@ export class DataSet {
}
this.tsne.step();
let result = this.tsne.getSolution();
- this.sampledDataIndices.forEach((index, i) => {
+ sampledIndices.forEach((index, i) => {
let dataPoint = this.points[index];
dataPoint.projections['tsne-0'] = result[i * tsneDim + 0];
@@ -350,7 +353,7 @@ export class DataSet {
// We found the nearest neighbors before and will reuse them.
knnComputation = Promise.resolve(this.nearest);
} else {
- let sampledData = this.sampledDataIndices.map(i => this.points[i]);
+ let sampledData = sampledIndices.map(i => this.points[i]);
this.nearestK = k;
knnComputation = KNN_GPU_ENABLED ?
knn.findKNNGPUCosine(sampledData, k, (d => d.vector)) :
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts
index 20b41a3db8..78accd80d0 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts
@@ -233,7 +233,10 @@ export class DataPanel extends DataPanelPolymer {
this.dom.select('#checkpoint-file')
.html(wordBreakablePath)
.attr('title', this.projectorConfig.modelCheckpointPath);
- let defaultTensor = names[0];
+ // If in demo mode, let the order decide which tensor to load by default.
+ let defaultTensor = this.projector.servingMode === 'demo' ?
+ this.projectorConfig.embeddings[0].tensorName :
+ names[0];
if (this.selectedTensor === defaultTensor) {
// Explicitly call the observer. Polymer won't call it if the previous
// string matches the current string.
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html
index 32ef680292..cddcb2b7d0 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html
@@ -216,7 +216,7 @@ limitations under the License.
</p>
<p>Iteration: <span class="run-tsne-iter">0</span></p>
<p id="tsne-sampling" class="notice">
- For fast results, the data will be sampled down to [[getTsneSampleSize()]] points.
+ For fast results, the data will be sampled down to [[getTsneSampleSizeText()]] points.
</p>
<p>
<iron-icon icon="book" class="book-icon"></iron-icon>
@@ -285,7 +285,7 @@ limitations under the License.
</p>
<div id="total-variance">Total variance</div>
<paper-tooltip for="pca-sampling" position="top" animation-delay="0" fit-to-visible-bounds>
- For fast results, the data was randomly projected down to [[getPcaSampledDim()]] dimensions.
+ For fast results, the data was sampled to [[getPcaSampleSizeText()]] points and randomly projected down to [[getPcaSampledDimText()]] dimensions.
</paper-tooltip>
</div>
<!-- Custom Controls -->
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts
index 9c172e4707..3b40ec27ce 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
-import {DataSet, PCA_SAMPLE_DIM, Projection, ProjectionType, SAMPLE_SIZE, SpriteAndMetadataInfo, State} from './data';
+import {DataSet, PCA_SAMPLE_DIM, PCA_SAMPLE_SIZE, Projection, ProjectionType, SpriteAndMetadataInfo, State, TSNE_SAMPLE_SIZE} from './data';
import * as vector from './vector';
import {Vector} from './vector';
import {Projector} from './vz-projector';
@@ -296,9 +296,13 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
this.clearCentroids();
this.dom.select('#tsne-sampling')
- .style('display', dataSet.points.length > SAMPLE_SIZE ? null : 'none');
+ .style(
+ 'display',
+ dataSet.points.length > TSNE_SAMPLE_SIZE ? null : 'none');
+ let wasSampled =
+ dataSet.dim[0] > PCA_SAMPLE_SIZE || dataSet.dim[1] > PCA_SAMPLE_DIM;
this.dom.select('#pca-sampling')
- .style('display', dataSet.dim[1] > PCA_SAMPLE_DIM ? null : 'none');
+ .style('display', wasSampled ? null : 'none');
this.showTab('pca');
}
@@ -538,12 +542,16 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
return {centroid: vector.centroid(r, accessor), numMatches: r.length};
}
- getPcaSampledDim() {
+ getPcaSampledDimText() {
return PCA_SAMPLE_DIM.toLocaleString();
}
- getTsneSampleSize() {
- return SAMPLE_SIZE.toLocaleString();
+ getPcaSampleSizeText() {
+ return PCA_SAMPLE_SIZE.toLocaleString();
+ }
+
+ getTsneSampleSizeText() {
+ return TSNE_SAMPLE_SIZE.toLocaleString();
}
}