aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/boosted_trees/proto/tree_config.proto
blob: 81411aa84ae848cfaa1392e82a1e38c3df19cdb6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
syntax = "proto3";
option cc_enable_arenas = true;

package tensorflow.boosted_trees.trees;

// TreeNode describes a node in a tree.
message TreeNode {
  oneof node {
    Leaf leaf = 1;
    DenseFloatBinarySplit dense_float_binary_split = 2;
    SparseFloatBinarySplitDefaultLeft sparse_float_binary_split_default_left =
        3;
    SparseFloatBinarySplitDefaultRight sparse_float_binary_split_default_right =
        4;
    CategoricalIdBinarySplit categorical_id_binary_split = 5;
    CategoricalIdSetMembershipBinarySplit
        categorical_id_set_membership_binary_split = 6;
  }
  TreeNodeMetadata node_metadata = 777;
}

// TreeNodeMetadata encodes metadata associated with each node in a tree.
message TreeNodeMetadata {
  // The gain associated with this node.
  float gain = 1;

  // The original leaf node before this node was split.
  Leaf original_leaf = 2;
}

// Leaves can either hold dense or sparse information.
message Leaf {
  oneof leaf {
    // See third_party/tensorflow/contrib/decision_trees/
    // proto/generic_tree_model.proto
    // for a description of how vector and sparse_vector might be used.
    Vector vector = 1;
    SparseVector sparse_vector = 2;
  }
}

message Vector {
  repeated float value = 1;
}

message SparseVector {
  repeated int32 index = 1;
  repeated float value = 2;
}

// Split rule for dense float features.
message DenseFloatBinarySplit {
  // Float feature column and split threshold describing
  // the rule feature <= threshold.
  int32 feature_column = 1;
  // If feature column is multivalent, this holds the index of the dimension
  // for the split. Defaults to 0.
  int32 dimension_id = 5;
  float threshold = 2;

  // Node children indexing into a contiguous
  // vector of nodes starting from the root.
  int32 left_id = 3;
  int32 right_id = 4;
}

// Split rule for sparse float features defaulting left for missing features.
message SparseFloatBinarySplitDefaultLeft {
  DenseFloatBinarySplit split = 1;
}

// Split rule for sparse float features defaulting right for missing features.
message SparseFloatBinarySplitDefaultRight {
  DenseFloatBinarySplit split = 1;
}

// Split rule for categorical features with a single feature Id.
message CategoricalIdBinarySplit {
  // Categorical feature column and Id describing
  // the rule feature == Id.
  int32 feature_column = 1;
  int64 feature_id = 2;

  // Node children indexing into a contiguous
  // vector of nodes starting from the root.
  int32 left_id = 3;
  int32 right_id = 4;
}

// Split rule for categorical features with a set of feature Ids.
message CategoricalIdSetMembershipBinarySplit {
  // Categorical feature column and Id describing
  // the rule feature ∈ feature_ids.
  int32 feature_column = 1;
  // Sorted list of Ids in the set.
  repeated int64 feature_ids = 2;

  // Node children indexing into a contiguous
  // vector of nodes starting from the root.
  int32 left_id = 3;
  int32 right_id = 4;
}

// DecisionTreeConfig describes a list of connected nodes.
// Node 0 must be the root and can carry any payload including a leaf
// in the case of representing the bias.
// Note that each node id is implicitly its index in the list of nodes.
message DecisionTreeConfig {
  repeated TreeNode nodes = 1;
}

message DecisionTreeMetadata {
  // How many times tree weight was updated (due to reweighting of the final
  // ensemble, dropout, shrinkage etc).
  int32 num_tree_weight_updates = 1;

  // Number of layers grown for this tree.
  int32 num_layers_grown = 2;

  // Whether the tree is finalized in that no more layers can be grown.
  bool is_finalized = 3;
}

message GrowingMetadata {
  // Number of trees that we have attempted to build. After pruning, these
  // trees might have been removed.
  int64 num_trees_attempted = 1;
  // Number of layers that we have attempted to build. After pruning, these
  // layers might have been removed.
  int64 num_layers_attempted = 2;

  // Sorted list of column handlers that have been used in at least one split
  // so far.
  repeated int64 used_handler_ids = 3;
}

// DecisionTreeEnsembleConfig describes an ensemble of decision trees.
message DecisionTreeEnsembleConfig {
  repeated DecisionTreeConfig trees = 1;
  repeated float tree_weights = 2;
  repeated DecisionTreeMetadata tree_metadata = 3;

  // Metadata that is used during the training.
  GrowingMetadata growing_metadata = 4;
}