tensorflow/contrib/tensor_forest/kernels/tree_utils.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628

// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include "tensorflow/contrib/tensor_forest/kernels/tree_utils.h"
#include <algorithm>
#include <cfloat>
#include "tensorflow/core/lib/random/philox_random.h"
#include "tensorflow/core/platform/logging.h"

namespace tensorflow {
namespace tensorforest {

using tensorflow::Tensor;

DataColumnTypes FindDenseFeatureSpec(
    int32 input_feature, const tensorforest::TensorForestDataSpec& spec) {
  return static_cast<DataColumnTypes>(spec.GetDenseFeatureType(input_feature));
}

DataColumnTypes FindSparseFeatureSpec(
    int32 input_feature, const tensorforest::TensorForestDataSpec& spec) {
  // TODO(thomaswc): Binary search here, especially when we start using more
  // than one sparse column
  int32 size_sum = spec.sparse(0).size();
  int32 column_num = 0;
  while (input_feature >= size_sum && column_num < spec.sparse_size()) {
    ++column_num;
    size_sum += spec.sparse(column_num).size();
  }

  return static_cast<DataColumnTypes>(spec.sparse(column_num).original_type());
}

void GetTwoBest(int max, const std::function<float(int)>& score_fn,
                float* best_score, int* best_index, float* second_best_score,
                int* second_best_index) {
  *best_index = -1;
  *second_best_index = -1;
  *best_score = FLT_MAX;
  *second_best_score = FLT_MAX;
  for (int i = 0; i < max; i++) {
    float score = score_fn(i);
    if (score < *best_score) {
      *second_best_score = *best_score;
      *second_best_index = *best_index;
      *best_score = score;
      *best_index = i;
    } else if (score < *second_best_score) {
      *second_best_score = score;
      *second_best_index = i;
    }
  }
}

float ClassificationSplitScore(
    const Eigen::Tensor<float, 1, Eigen::RowMajor>& splits,
    const Eigen::Tensor<float, 1, Eigen::RowMajor>& rights, int32 num_classes,
    int i) {
  Eigen::array<int, 1> offsets;
  // Class counts are stored with the total in [0], so the length of each
  // count vector is num_classes + 1.
  offsets[0] = i * (num_classes + 1) + 1;
  Eigen::array<int, 1> extents;
  extents[0] = num_classes;
  return WeightedGiniImpurity(splits.slice(offsets, extents)) +
         WeightedGiniImpurity(rights.slice(offsets, extents));
}

void GetTwoBestClassification(const Tensor& total_counts,
                              const Tensor& split_counts, int32 accumulator,
                              float* best_score, int* best_index,
                              float* second_best_score,
                              int* second_best_index) {
  const int32 num_splits = static_cast<int32>(split_counts.shape().dim_size(1));
  const int32 num_classes =
      static_cast<int32>(split_counts.shape().dim_size(2)) - 1;

  // Ideally, Eigen::Tensor::chip would be best to use here but it results
  // in seg faults, so we have to go with flat views of these tensors.  However,
  // it is still pretty efficient because we put off evaluation until the
  // score is actually returned.
  const auto tc =
      total_counts.Slice(accumulator, accumulator + 1).unaligned_flat<float>();

  // TODO(gilberth): See if we can delay evaluation here by templating the
  // arguments to ClassificationSplitScore.
  const Eigen::Tensor<float, 1, Eigen::RowMajor> splits =
      split_counts.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
  Eigen::array<int, 1> bcast;
  bcast[0] = num_splits;
  const Eigen::Tensor<float, 1, Eigen::RowMajor> rights =
      tc.broadcast(bcast) - splits;

  std::function<float(int)> score_fn =
      std::bind(ClassificationSplitScore, splits, rights, num_classes,
                std::placeholders::_1);

  GetTwoBest(num_splits, score_fn, best_score, best_index, second_best_score,
             second_best_index);
}

int32 BestFeatureClassification(const Tensor& total_counts,
                                const Tensor& split_counts, int32 accumulator) {
  float best_score;
  float second_best_score;
  int best_feature_index;
  int second_best_index;
  GetTwoBestClassification(total_counts, split_counts, accumulator, &best_score,
                           &best_feature_index, &second_best_score,
                           &second_best_index);
  return best_feature_index;
}

float RegressionSplitScore(
    const Eigen::Tensor<float, 3, Eigen::RowMajor>& splits_count_accessor,
    const Eigen::Tensor<float, 2, Eigen::RowMajor>& totals_count_accessor,
    const Eigen::Tensor<float, 1, Eigen::RowMajor>& splits_sum,
    const Eigen::Tensor<float, 1, Eigen::RowMajor>& splits_square,
    const Eigen::Tensor<float, 1, Eigen::RowMajor>& right_sums,
    const Eigen::Tensor<float, 1, Eigen::RowMajor>& right_squares,
    int32 accumulator, int32 num_regression_dims, int i) {
  Eigen::array<int, 1> offsets = {i * num_regression_dims + 1};
  Eigen::array<int, 1> extents = {num_regression_dims - 1};
  float left_count = splits_count_accessor(accumulator, i, 0);
  float right_count = totals_count_accessor(accumulator, 0) - left_count;

  float score = 0;

  // Guard against divide-by-zero.
  if (left_count > 0) {
    score +=
        WeightedVariance(splits_sum.slice(offsets, extents),
                         splits_square.slice(offsets, extents), left_count);
  }

  if (right_count > 0) {
    score +=
        WeightedVariance(right_sums.slice(offsets, extents),
                         right_squares.slice(offsets, extents), right_count);
  }
  return score;
}

void GetTwoBestRegression(const Tensor& total_sums, const Tensor& total_squares,
                          const Tensor& split_sums, const Tensor& split_squares,
                          int32 accumulator, float* best_score, int* best_index,
                          float* second_best_score, int* second_best_index) {
  const int32 num_splits = static_cast<int32>(split_sums.shape().dim_size(1));
  const int32 num_regression_dims =
      static_cast<int32>(split_sums.shape().dim_size(2));
  // Ideally, Eigen::Tensor::chip would be best to use here but it results
  // in seg faults, so we have to go with flat views of these tensors.  However,
  // it is still pretty efficient because we put off evaluation until the
  // score is actually returned.
  const auto tc_sum =
      total_sums.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
  const auto tc_square =
      total_squares.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
  const auto splits_sum =
      split_sums.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
  const auto splits_square =
      split_squares.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
  // Eigen is infuriating to work with, usually resulting in all kinds of
  // unhelpful compiler errors when trying something that seems sane.  This
  // helps us do a simple thing like access the first element (the counts)
  // of these tensors so we can calculate expected value in Variance().
  const auto splits_count_accessor = split_sums.tensor<float, 3>();
  const auto totals_count_accessor = total_sums.tensor<float, 2>();

  Eigen::array<int, 1> bcast;
  bcast[0] = num_splits;
  const auto right_sums = tc_sum.broadcast(bcast) - splits_sum;
  const auto right_squares = tc_square.broadcast(bcast) - splits_square;

  GetTwoBest(num_splits,
             std::bind(RegressionSplitScore, splits_count_accessor,
                       totals_count_accessor, splits_sum, splits_square,
                       right_sums, right_squares, accumulator,
                       num_regression_dims, std::placeholders::_1),
             best_score, best_index, second_best_score, second_best_index);
}

int32 BestFeatureRegression(const Tensor& total_sums,
                            const Tensor& total_squares,
                            const Tensor& split_sums,
                            const Tensor& split_squares, int32 accumulator) {
  float best_score;
  float second_best_score;
  int best_feature_index;
  int second_best_index;
  GetTwoBestRegression(total_sums, total_squares, split_sums, split_squares,
                       accumulator, &best_score, &best_feature_index,
                       &second_best_score, &second_best_index);
  return best_feature_index;
}

bool BestSplitDominatesRegression(const Tensor& total_sums,
                                  const Tensor& total_squares,
                                  const Tensor& split_sums,
                                  const Tensor& split_squares,
                                  int32 accumulator) {
  // TODO(thomaswc): Implement this, probably as part of v3.
  return false;
}

int BootstrapGini(int n, int s, const random::DistributionSampler& ds,
                  random::SimplePhilox* rand) {
  std::vector<int> counts(s, 0);
  for (int i = 0; i < n; i++) {
    int j = ds.Sample(rand);
    counts[j] += 1;
  }
  int g = 0;
  for (int j = 0; j < s; j++) {
    g += counts[j] * counts[j];
  }
  // The true gini is 1 + (-g) / n^2
  return -g;
}

// Populate *weights with the smoothed per-class frequencies needed to
// initialize a DistributionSampler.  Returns the total number of samples
// seen by this accumulator.
int MakeBootstrapWeights(const Tensor& total_counts, const Tensor& split_counts,
                         int32 accumulator, int index,
                         std::vector<float>* weights) {
  const int32 num_classes =
      static_cast<int32>(split_counts.shape().dim_size(2)) - 1;

  auto tc = total_counts.tensor<float, 2>();
  auto lc = split_counts.tensor<float, 3>();

  int n = tc(accumulator, 0);

  float denom = static_cast<float>(n) + static_cast<float>(num_classes);

  weights->resize(num_classes * 2);
  for (int i = 0; i < num_classes; i++) {
    // Use the Laplace smoothed per-class probabilities when generating the
    // bootstrap samples.
    float left_count = lc(accumulator, index, i + 1);
    (*weights)[i] = (left_count + 1.0) / denom;
    float right_count = tc(accumulator, i + 1) - left_count;
    (*weights)[num_classes + i] = (right_count + 1.0) / denom;
  }

  return n;
}

bool BestSplitDominatesClassificationBootstrap(const Tensor& total_counts,
                                               const Tensor& split_counts,
                                               int32 accumulator,
                                               float dominate_fraction,
                                               random::SimplePhilox* rand) {
  float best_score;
  float second_best_score;
  int best_feature_index;
  int second_best_index;
  GetTwoBestClassification(total_counts, split_counts, accumulator, &best_score,
                           &best_feature_index, &second_best_score,
                           &second_best_index);

  std::vector<float> weights1;
  int n1 = MakeBootstrapWeights(total_counts, split_counts, accumulator,
                                best_feature_index, &weights1);
  random::DistributionSampler ds1(weights1);

  std::vector<float> weights2;
  int n2 = MakeBootstrapWeights(total_counts, split_counts, accumulator,
                                second_best_index, &weights2);
  random::DistributionSampler ds2(weights2);

  const int32 num_classes =
      static_cast<int32>(split_counts.shape().dim_size(2)) - 1;

  float p = 1.0 - dominate_fraction;
  if (p <= 0 || p > 1.0) {
    LOG(FATAL) << "Invalid dominate fraction " << dominate_fraction;
  }

  int bootstrap_samples = 1;
  while (p < 1.0) {
    bootstrap_samples += 1;
    p = p * 2;
  }

  int worst_g1 = 0;
  for (int i = 0; i < bootstrap_samples; i++) {
    int g1 = BootstrapGini(n1, 2 * num_classes, ds1, rand);
    worst_g1 = std::max(worst_g1, g1);
  }

  int best_g2 = 99;
  for (int i = 0; i < bootstrap_samples; i++) {
    int g2 = BootstrapGini(n2, 2 * num_classes, ds2, rand);
    best_g2 = std::min(best_g2, g2);
  }

  return worst_g1 < best_g2;
}

bool BestSplitDominatesClassificationHoeffding(const Tensor& total_counts,
                                               const Tensor& split_counts,
                                               int32 accumulator,
                                               float dominate_fraction) {
  float best_score;
  float second_best_score;
  int best_feature_index;
  int second_best_index;
  VLOG(1) << "BSDC for accumulator " << accumulator;
  GetTwoBestClassification(total_counts, split_counts, accumulator, &best_score,
                           &best_feature_index, &second_best_score,
                           &second_best_index);
  VLOG(1) << "Best score = " << best_score;
  VLOG(1) << "2nd best score = " << second_best_score;

  const int32 num_classes =
      static_cast<int32>(split_counts.shape().dim_size(2)) - 1;
  const float n = total_counts.Slice(accumulator, accumulator + 1)
                      .unaligned_flat<float>()(0);

  // Each term in the Gini impurity can range from 0 to 0.5 * 0.5.
  float range = 0.25 * static_cast<float>(num_classes) * n;

  float hoeffding_bound =
      range * sqrt(log(1.0 / (1.0 - dominate_fraction)) / (2.0 * n));

  VLOG(1) << "num_classes = " << num_classes;
  VLOG(1) << "n = " << n;
  VLOG(1) << "range = " << range;
  VLOG(1) << "hoeffding_bound = " << hoeffding_bound;
  return (second_best_score - best_score) > hoeffding_bound;
}

double DirichletCovarianceTrace(const Tensor& total_counts,
                                const Tensor& split_counts, int32 accumulator,
                                int index) {
  const int32 num_classes =
      static_cast<int32>(split_counts.shape().dim_size(2)) - 1;

  auto tc = total_counts.tensor<float, 2>();
  auto lc = split_counts.tensor<float, 3>();

  double leftc = 0.0;
  double leftc2 = 0.0;
  double rightc = 0.0;
  double rightc2 = 0.0;
  for (int i = 1; i <= num_classes; i++) {
    double l = lc(accumulator, index, i) + 1.0;
    leftc += l;
    leftc2 += l * l;

    double r = tc(accumulator, i) - lc(accumulator, index, i) + 1.0;
    rightc += r;
    rightc2 += r * r;
  }

  double left_trace = (1.0 - leftc2 / (leftc * leftc)) / (leftc + 1.0);
  double right_trace = (1.0 - rightc2 / (rightc * rightc)) / (rightc + 1.0);
  return left_trace + right_trace;
}

void getDirichletMean(const Tensor& total_counts, const Tensor& split_counts,
                      int32 accumulator, int index, std::vector<float>* mu) {
  const int32 num_classes =
      static_cast<int32>(split_counts.shape().dim_size(2)) - 1;

  mu->resize(num_classes * 2);
  auto tc = total_counts.tensor<float, 2>();
  auto lc = split_counts.tensor<float, 3>();

  double total = tc(accumulator, 0);

  for (int i = 0; i < num_classes; i++) {
    double l = lc(accumulator, index, i + 1);
    mu->at(i) = (l + 1.0) / (total + num_classes);

    double r = tc(accumulator, i) - l;
    mu->at(i + num_classes) = (r + 1.) / (total + num_classes);
  }
}

// Given lambda3, returns the distance from (mu1, mu2) to the surface.
double getDistanceFromLambda3(double lambda3, const std::vector<float>& mu1,
                              const std::vector<float>& mu2) {
  if (fabs(lambda3) == 1.0) {
    return 0.0;
  }

  int n = mu1.size();
  double lambda1 = -2.0 * lambda3 / n;
  double lambda2 = 2.0 * lambda3 / n;
  // From below,
  //   x = (lambda_1 1 + 2 mu1) / (2 - 2 lambda_3)
  //   y = (lambda_2 1 + 2 mu2) / (2 + 2 lambda_3)
  double dist = 0.0;
  for (size_t i = 0; i < mu1.size(); i++) {
    double diff = (lambda1 + 2.0 * mu1[i]) / (2.0 - 2.0 * lambda3) - mu1[i];
    dist += diff * diff;
    diff = (lambda2 + 2.0 * mu2[i]) / (2.0 + 2.0 * lambda3) - mu2[i];
    dist += diff * diff;
  }
  return dist;
}

// Returns the distance between (mu1, mu2) and (x, y), where (x, y) is the
// nearest point that lies on the surface defined by
// {x dot 1 = 1, y dot 1 = 1, x dot x - y dot y = 0}.
double getChebyshevEpsilon(const std::vector<float>& mu1,
                           const std::vector<float>& mu2) {
  // Math time!!
  // We are trying to minimize d = |mu1 - x|^2 + |mu2 - y|^2 over the surface.
  // Using Lagrange multipliers, we get
  //   partial d / partial x = -2 mu1 + 2 x = lambda_1 1 + 2 lambda_3 x
  //   partial d / partial y = -2 mu2 + 2 y = lambda_2 1 - 2 lambda_3 y
  // or
  //   x = (lambda_1 1 + 2 mu1) / (2 - 2 lambda_3)
  //   y = (lambda_2 1 + 2 mu2) / (2 + 2 lambda_3)
  // which implies
  //   2 - 2 lambda_3 = lambda_1 1 dot 1 + 2 mu1 dot 1
  //   2 + 2 lambda_3 = lambda_2 1 dot 1 + 2 mu2 dot 1
  //   |lambda_1 1 + 2 mu1|^2 (2 + 2 lambda_3)^2 =
  //     |lambda_2 1 + 2 mu2|^2 (2 - 2 lambda_3)^2
  // So solving for the lambda's and using the fact that
  // mu1 dot 1 = 1 and mu2 dot 1 = 1,
  //   lambda_1 = -2 lambda_3 / (1 dot 1)
  //   lambda_2 = 2 lambda_3 / (1 dot 1)
  // and (letting n = 1 dot 1)
  //   | - lambda_3 1 + n mu1 |^2 (1 + lambda_3)^2 =
  //   | lambda_3 1 + n mu2 |^2 (1 - lambda_3)^2
  // or
  // (lambda_3^2 n - 2 n lambda_3 + n^2 mu1 dot mu1)(1 + lambda_3)^2 =
  // (lambda_3^2 n + 2 n lambda_3 + n^2 mu2 dot mu2)(1 - lambda_3)^2
  // or
  // (lambda_3^2 - 2 lambda_3 + n mu1 dot mu1)(1 + 2 lambda_3 + lambda_3^2) =
  // (lambda_3^2 + 2 lambda_3 + n mu2 dot mu2)(1 - 2 lambda_3 + lambda_3^2)
  // or
  // lambda_3^2 - 2 lambda_3 + n mu1 dot mu1
  // + 2 lambda_3^3 - 2 lambda_3^2 + 2n lambda_3 mu1 dot mu1
  // + lambda_3^4 - 2 lambda_3^3 + n lambda_3^2 mu1 dot mu1
  // =
  // lambda_3^2 + 2 lambda_3 + n mu2 dot mu2
  // - 2 lambda_3^3 -4 lambda_3^2 - 2n lambda_3 mu2 dot mu2
  // + lambda_3^4 + 2 lambda_3^3 + n lambda_3^2 mu2 dot mu2
  // or
  // - 2 lambda_3 + n mu1 dot mu1
  // - 2 lambda_3^2 + 2n lambda_3 mu1 dot mu1
  // + n lambda_3^2 mu1 dot mu1
  // =
  // + 2 lambda_3 + n mu2 dot mu2
  // -4 lambda_3^2 - 2n lambda_3 mu2 dot mu2
  // + n lambda_3^2 mu2 dot mu2
  // or
  // lambda_3^2 (2 + n mu1 dot mu1 + n mu2 dot mu2)
  // + lambda_3 (2n mu1 dot mu1 + 2n mu2 dot mu2 - 4)
  // + n mu1 dot mu1 - n mu2 dot mu2 = 0
  // which can be solved using the quadratic formula.
  int n = mu1.size();
  double len1 = 0.0;
  for (float m : mu1) {
    len1 += m * m;
  }
  double len2 = 0.0;
  for (float m : mu2) {
    len2 += m * m;
  }
  double a = 2 + n * (len1 + len2);
  double b = 2 * n * (len1 + len2) - 4;
  double c = n * (len1 - len2);
  double discrim = b * b - 4 * a * c;
  if (discrim < 0.0) {
    LOG(WARNING) << "Negative discriminant " << discrim;
    return 0.0;
  }

  double sdiscrim = sqrt(discrim);
  // TODO(thomaswc): Analyze whatever one of these is always closer.
  double v1 = (-b + sdiscrim) / (2 * a);
  double v2 = (-b - sdiscrim) / (2 * a);
  double dist1 = getDistanceFromLambda3(v1, mu1, mu2);
  double dist2 = getDistanceFromLambda3(v2, mu1, mu2);
  return std::min(dist1, dist2);
}

bool BestSplitDominatesClassificationChebyshev(const Tensor& total_counts,
                                               const Tensor& split_counts,
                                               int32 accumulator,
                                               float dominate_fraction) {
  float best_score;
  float second_best_score;
  int best_feature_index;
  int second_best_index;
  VLOG(1) << "BSDC for accumulator " << accumulator;
  GetTwoBestClassification(total_counts, split_counts, accumulator, &best_score,
                           &best_feature_index, &second_best_score,
                           &second_best_index);
  VLOG(1) << "Best score = " << best_score;
  VLOG(1) << "2nd best score = " << second_best_score;

  const int32 num_classes =
      static_cast<int32>(split_counts.shape().dim_size(2)) - 1;
  const float n = total_counts.Slice(accumulator, accumulator + 1)
                      .unaligned_flat<float>()(0);

  VLOG(1) << "num_classes = " << num_classes;
  VLOG(1) << "n = " << n;
  double trace = DirichletCovarianceTrace(total_counts, split_counts,
                                          accumulator, best_feature_index) +
                 DirichletCovarianceTrace(total_counts, split_counts,
                                          accumulator, second_best_index);

  std::vector<float> mu1;
  getDirichletMean(total_counts, split_counts, accumulator, best_feature_index,
                   &mu1);
  std::vector<float> mu2;
  getDirichletMean(total_counts, split_counts, accumulator, second_best_index,
                   &mu2);
  double epsilon = getChebyshevEpsilon(mu1, mu2);

  if (epsilon == 0.0) {
    return false;
  }

  double dirichlet_bound = 1.0 - trace / (epsilon * epsilon);
  return dirichlet_bound > dominate_fraction;
}

GetFeatureFnType GetDenseFunctor(const Tensor& dense) {
  if (dense.shape().dims() == 2) {
    const auto dense_features = dense.matrix<float>();
    // Here we capture by value, which shouldn't incur a copy of the data
    // because of the underlying use of Eigen::TensorMap.
    return [dense_features](int32 i, int32 feature) {
      return dense_features(i, feature);
    };
  } else {
    return [](int32 i, int32 feature) {
      LOG(ERROR) << "trying to access nonexistent dense features.";
      return 0;
    };
  }
}

GetFeatureFnType GetSparseFunctor(const Tensor& sparse_indices,
                                  const Tensor& sparse_values) {
  if (sparse_indices.shape().dims() == 2) {
    const auto indices = sparse_indices.matrix<int64>();
    const auto values = sparse_values.vec<float>();
    // Here we capture by value, which shouldn't incur a copy of the data
    // because of the underlying use of Eigen::TensorMap.
    return [indices, values](int32 i, int32 feature) {
      return tensorforest::FindSparseValue(indices, values, i, feature);
    };
  } else {
    return [](int32 i, int32 feature) {
      LOG(ERROR) << "trying to access nonexistent sparse features.";
      return 0;
    };
  }
}

bool DecideNode(const GetFeatureFnType& get_dense,
                const GetFeatureFnType& get_sparse, int32 i, int32 feature,
                float bias, const tensorforest::TensorForestDataSpec& spec) {
  if (feature < spec.dense_features_size()) {
    return Decide(get_dense(i, feature), bias,
                  FindDenseFeatureSpec(feature, spec));
  } else {
    const int32 sparse_feature = feature - spec.dense_features_size();
    return Decide(get_sparse(i, sparse_feature), bias,
                  FindSparseFeatureSpec(sparse_feature, spec));
  }
}

bool Decide(float value, float bias, DataColumnTypes type) {
  switch (type) {
    case kDataFloat:
      return value >= bias;

    case kDataCategorical:
      // We arbitrarily define categorical equality as going left.
      return value != bias;

    default:
      LOG(ERROR) << "Got unknown column type: " << type;
      return false;
  }
}

void GetParentWeightedMean(float leaf_sum, const float* leaf_data,
                           float parent_sum, const float* parent_data,
                           float valid_leaf_threshold, int num_outputs,
                           std::vector<float>* mean) {
  float parent_weight = 0.0;
  if (leaf_sum < valid_leaf_threshold && parent_sum >= 0) {
    VLOG(1) << "not enough samples at leaf, including parent counts."
            << "child sum = " << leaf_sum;
    // Weight the parent's counts just enough so that the new sum is
    // valid_leaf_threshold_, but never give any counts a weight of
    // more than 1.
    parent_weight =
        std::min(1.0f, (valid_leaf_threshold - leaf_sum) / parent_sum);
    leaf_sum += parent_weight * parent_sum;
    VLOG(1) << "Sum w/ parent included = " << leaf_sum;
  }

  for (int c = 0; c < num_outputs; c++) {
    float w = leaf_data[c];
    if (parent_weight > 0.0) {
      w += parent_weight * parent_data[c];
    }
    (*mean)[c] = w / leaf_sum;
  }
}

}  // namespace tensorforest
}  // namespace tensorflow