diff options
Diffstat (limited to 'tensorflow/python/estimator/canned/boosted_trees_utils_test.py')
-rw-r--r-- | tensorflow/python/estimator/canned/boosted_trees_utils_test.py | 187 |
1 files changed, 187 insertions, 0 deletions
diff --git a/tensorflow/python/estimator/canned/boosted_trees_utils_test.py b/tensorflow/python/estimator/canned/boosted_trees_utils_test.py new file mode 100644 index 0000000000..506d4ea6fb --- /dev/null +++ b/tensorflow/python/estimator/canned/boosted_trees_utils_test.py @@ -0,0 +1,187 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests boosted_trees estimators and model_fn.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.estimator.canned import boosted_trees_utils +from tensorflow.python.framework import test_util +from tensorflow.python.platform import googletest + + +class BoostedTreesDFCTest(test_util.TensorFlowTestCase): + """Test directional feature contributions (DFC) helper functions. """ + + def testDirectionalFeatureContributionsCompute(self): + """Tests logic to compute DFCs given feature ids and logits paths.""" + num_bucketized_features = 3 # Includes one unused feature. + examples_feature_ids = ((2, 2, 0, 0), (2, 2, 0)) + e1_feature_ids, e2_feature_ids = examples_feature_ids + + # DFCs are computed by traversing the prediction path and subtracting each + # child prediction from its parent prediction and associating the change in + # prediction with the respective feature id used for the split. + # For each activation function, f, (currently identity or sigmoid), DFCs are + # calculated for the two examples as: + # example 1: + # feature_0 = (f(1.114) - f(1.214)) + (f(6.114) - f(1.114)) + # feature_1 = 0 # Feature not in ensemble, thus zero contrib. + # feature_2 = (f(0.114) - bias_pred) + (f(1.214) - f(0.114)) + # example 2: + # feature_0 = f(-5.486) - f(1.514) + # feature_1 = 0 # Feature not in ensemble, thus zero contrib. + # feature_2 = (f(0.114) - bias_pred) + (f(1.514) - f(0.114)) + # where bias_pred is = f(0) or f(0.21), with center_bias = {True, False}, + # respectively. + # Keys are center_bias. + expected_dfcs_identity = { + False: ({ + 0: 4.9, + 1: 0, + 2: 1.214 + }, { + 0: -7.0, + 1: 0, + 2: 1.514 + }), + True: ({ + 0: 4.9, + 1: 0, + 2: 1.0039999999999998 + }, { + 0: -7.0, + 1: 0, + 2: 1.3039999999999998 + }) + } + expected_dfcs_sigmoid = { + False: ({ + 0: 0.22678725678805578, + 1: 0, + 2: 0.2710059376234506 + }, { + 0: -0.81552596670046507, + 1: 0, + 2: 0.319653250251275 + }), + True: ({ + 0: 0.22678725678805578, + 1: 0, + 2: 0.2186980280491253 + }, { + 0: -0.81552596670046507, + 1: 0, + 2: 0.26734534067694971 + }) + } + # pylint: disable=protected-access + for f, expected_dfcs in zip( + (boosted_trees_utils._identity, boosted_trees_utils._sigmoid), + (expected_dfcs_identity, expected_dfcs_sigmoid)): + for center_bias in [False, True]: + # If not center_bias, the bias after activation is 0. + if center_bias: + bias_logit = 0.21 # Root node of tree_0. + else: + bias_logit = 0 # 0 is default value when there is no original_leaf. + f_bias = f(bias_logit) + + # Logits before and after, as is outputed from + # boosted_trees_ops.example_debug_outputs + examples_logits_paths = ((bias_logit, 0.114, 1.214, 1.114, 6.114), + (bias_logit, 0.114, 1.514, -5.486)) + e1_logits_path, e2_logits_path = examples_logits_paths + e1_expected_dfcs, e2_expected_dfcs = expected_dfcs[center_bias] + # Check feature contributions are correct for both examples. + # Example 1. + # pylint:disable=line-too-long + e1_bias, e1_dfc = boosted_trees_utils._compute_directional_feature_contributions( + e1_feature_ids, e1_logits_path, f, num_bucketized_features) + self.assertAllClose(e1_bias, f_bias) + self.assertAllClose(e1_dfc, e1_expected_dfcs) + # Example 2. + e2_bias, e2_dfc = boosted_trees_utils._compute_directional_feature_contributions( + e2_feature_ids, e2_logits_path, f, num_bucketized_features) + # pylint:enable=line-too-long + self.assertAllClose(e2_bias, f_bias) + self.assertAllClose(e2_dfc, e2_expected_dfcs) + # Check if contributions sum to final prediction. + # For each tree, get leaf of last tree. + expected_logits = (e1_logits_path[-1], e2_logits_path[-1]) + # Predictions should be the sum of contributions + bias. + expected_preds = [f(logit) for logit in expected_logits] + e1_pred = e1_bias + sum(e1_dfc.values()) + e2_pred = e2_bias + sum(e2_dfc.values()) + preds = [e1_pred, e2_pred] + self.assertAllClose(preds, expected_preds) + # pylint: enable=protected-access + + def testDFCComputeComparedToExternalExample(self): + """Tests `compute_dfc` compared to external example (regression). + + Example from http://blog.datadive.net/interpreting-random-forests. + """ + # DIS:3, RM: 2, LSTAT:1, NOX:0 + num_bucketized_features = 4 + e1_feature_ids = (2, 1, 0) + e2_feature_ids = (2, 2, 2) + e3_feature_ids = (2, 2, 0) + + bias_logit = 22.60 # Root node of tree_0. + activation = boosted_trees_utils._identity + f_bias = activation(bias_logit) + # Logits before and after, as is outputed from + # boosted_trees_ops.example_debug_outputs + e1_logits_path = (bias_logit, 19.96, 14.91, 18.11) + e2_logits_path = (bias_logit, 37.42, 45.10, 45.90) + e3_logits_path = (bias_logit, 37.42, 32.30, 33.58) + e1_expected_dfcs = {0: 3.20, 1: -5.05, 2: -2.64, 3: 0} + e2_expected_dfcs = {0: 0, 1: 0, 2: 23.3, 3: 0} + e3_expected_dfcs = {0: 1.28, 1: 0, 2: 9.7, 3: 0} + # Check feature contributions are correct for both examples. + # Example 1. + # pylint: disable=protected-access + # pylint: disable=line-too-long + e1_bias, e1_dfc = boosted_trees_utils._compute_directional_feature_contributions( + e1_feature_ids, e1_logits_path, activation, num_bucketized_features) + self.assertAllClose(e1_bias, f_bias) + self.assertAllClose(e1_dfc, e1_expected_dfcs) + # Example 2. + e2_bias, e2_dfc = boosted_trees_utils._compute_directional_feature_contributions( + e2_feature_ids, e2_logits_path, activation, num_bucketized_features) + self.assertAllClose(e2_bias, f_bias) + self.assertAllClose(e2_dfc, e2_expected_dfcs) + # Example 3. + e3_bias, e3_dfc = boosted_trees_utils._compute_directional_feature_contributions( + e3_feature_ids, e3_logits_path, activation, num_bucketized_features) + # pylint: enable=line-too-long + self.assertAllClose(e3_bias, f_bias) + self.assertAllClose(e3_dfc, e3_expected_dfcs) + # pylint: enable=protected-access + # Check if contributions sum to final prediction. + # For each tree, get leaf of last tree. + expected_logits = (18.11, 45.90, 33.58) + # Predictions should be the sum of contributions + bias. + expected_preds = [activation(logit) for logit in expected_logits] + e1_pred = e1_bias + sum(e1_dfc.values()) + e2_pred = e2_bias + sum(e2_dfc.values()) + e3_pred = e3_bias + sum(e3_dfc.values()) + preds = [e1_pred, e2_pred, e3_pred] + self.assertAllClose(preds, expected_preds) + + +if __name__ == '__main__': + googletest.main() |