aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/boosted_trees
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-08-03 08:15:19 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-08-03 08:18:35 -0700
commit5f13e7017e059b234a1d66e704122ddae2d1a2cb (patch)
treeeb6dcb2f02129c3d2c479445e5e4fd564a523056 /tensorflow/contrib/boosted_trees
parentb2933c618260edc039fb8a7e2dce4d2e185f0892 (diff)
Estimator test for multiclass, core head for multiclass.
PiperOrigin-RevId: 207268708
Diffstat (limited to 'tensorflow/contrib/boosted_trees')
-rw-r--r--tensorflow/contrib/boosted_trees/estimator_batch/BUILD2
-rw-r--r--tensorflow/contrib/boosted_trees/estimator_batch/estimator.py21
-rw-r--r--tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py170
-rw-r--r--tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py3
4 files changed, 195 insertions, 1 deletions
diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD
index f4a375328e..5fcb19a47a 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD
@@ -191,7 +191,7 @@ py_test(
py_test(
name = "estimator_test",
- size = "medium",
+ size = "large",
srcs = ["estimator_test.py"],
srcs_version = "PY2AND3",
tags = [
diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py b/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py
index 2fa3db1e8d..870ce2442b 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py
@@ -22,8 +22,10 @@ from tensorflow.contrib.boosted_trees.estimator_batch import model
from tensorflow.contrib.boosted_trees.python.utils import losses
from tensorflow.contrib.learn.python.learn.estimators import estimator
from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
+from tensorflow.python.estimator.canned import head as core_head_lib
from tensorflow.python.estimator import estimator as core_estimator
from tensorflow.python.ops import math_ops
+from tensorflow.python.ops.losses import losses as core_losses
# ================== Old estimator interface===================================
@@ -403,6 +405,25 @@ class GradientBoostedDecisionTreeRanker(estimator.Estimator):
# The estimators below use new core Estimator interface and must be used with
# new feature columns and heads.
+# For multiclass classification, use the following head since it uses loss
+# that is twice differentiable.
+def core_multiclass_head(n_classes):
+ """Core head for multiclass problems."""
+
+ def loss_fn(labels, logits):
+ result = losses.per_example_maxent_loss(
+ labels=labels, logits=logits, weights=None, num_classes=n_classes)
+ return result[0]
+
+ # pylint:disable=protected-access
+ head_fn = core_head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+ n_classes=n_classes,
+ loss_fn=loss_fn,
+ loss_reduction=core_losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)
+ # pylint:enable=protected-access
+
+ return head_fn
+
class CoreGradientBoostedDecisionTreeEstimator(core_estimator.Estimator):
"""An estimator using gradient boosted decision trees.
diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py
index 83ef87c6fd..68d710d713 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py
@@ -39,6 +39,15 @@ def _train_input_fn():
return features, label
+def _multiclass_train_input_fn():
+ features = {
+ "x": constant_op.constant([[2.], [1.], [1.], [5.], [3.5], [4.6], [3.5]])
+ }
+ label = constant_op.constant(
+ [[1], [0], [0], [2], [2], [0], [1]], dtype=dtypes.int32)
+ return features, label
+
+
def _ranking_train_input_fn():
features = {
"a.f1": constant_op.constant([[3.], [0.3], [1.]]),
@@ -248,6 +257,86 @@ class BoostedTreeEstimatorTest(test_util.TensorFlowTestCase):
classifier.fit(input_fn=_train_input_fn, steps=15)
self._assert_checkpoint(classifier.model_dir, global_step=10000000)
+ def testFitAndEvaluateMultiClassTreePerClassDontThrowException(self):
+ learner_config = learner_pb2.LearnerConfig()
+ learner_config.num_classes = 3
+ learner_config.constraints.max_tree_depth = 1
+ learner_config.multi_class_strategy = (
+ learner_pb2.LearnerConfig.TREE_PER_CLASS)
+
+ model_dir = tempfile.mkdtemp()
+ config = run_config.RunConfig()
+
+ classifier = estimator.GradientBoostedDecisionTreeClassifier(
+ learner_config=learner_config,
+ n_classes=learner_config.num_classes,
+ num_trees=1,
+ examples_per_layer=7,
+ model_dir=model_dir,
+ config=config,
+ feature_columns=[contrib_feature_column.real_valued_column("x")])
+
+ classifier.fit(input_fn=_multiclass_train_input_fn, steps=100)
+ classifier.evaluate(input_fn=_eval_input_fn, steps=1)
+ classifier.export(self._export_dir_base)
+ result_iter = classifier.predict(input_fn=_eval_input_fn)
+ for prediction_dict in result_iter:
+ self.assertTrue("classes" in prediction_dict)
+
+ def testFitAndEvaluateMultiClassDiagonalDontThrowException(self):
+ learner_config = learner_pb2.LearnerConfig()
+ learner_config.num_classes = 3
+ learner_config.constraints.max_tree_depth = 1
+ learner_config.multi_class_strategy = (
+ learner_pb2.LearnerConfig.DIAGONAL_HESSIAN)
+
+ model_dir = tempfile.mkdtemp()
+ config = run_config.RunConfig()
+
+ classifier = estimator.GradientBoostedDecisionTreeClassifier(
+ learner_config=learner_config,
+ n_classes=learner_config.num_classes,
+ num_trees=1,
+ examples_per_layer=7,
+ model_dir=model_dir,
+ config=config,
+ center_bias=False,
+ feature_columns=[contrib_feature_column.real_valued_column("x")])
+
+ classifier.fit(input_fn=_multiclass_train_input_fn, steps=100)
+ classifier.evaluate(input_fn=_eval_input_fn, steps=1)
+ classifier.export(self._export_dir_base)
+ result_iter = classifier.predict(input_fn=_eval_input_fn)
+ for prediction_dict in result_iter:
+ self.assertTrue("classes" in prediction_dict)
+
+ def testFitAndEvaluateMultiClassFullDontThrowException(self):
+ learner_config = learner_pb2.LearnerConfig()
+ learner_config.num_classes = 3
+ learner_config.constraints.max_tree_depth = 1
+ learner_config.multi_class_strategy = (
+ learner_pb2.LearnerConfig.FULL_HESSIAN)
+
+ model_dir = tempfile.mkdtemp()
+ config = run_config.RunConfig()
+
+ classifier = estimator.GradientBoostedDecisionTreeClassifier(
+ learner_config=learner_config,
+ n_classes=learner_config.num_classes,
+ num_trees=1,
+ examples_per_layer=7,
+ model_dir=model_dir,
+ config=config,
+ center_bias=False,
+ feature_columns=[contrib_feature_column.real_valued_column("x")])
+
+ classifier.fit(input_fn=_multiclass_train_input_fn, steps=100)
+ classifier.evaluate(input_fn=_eval_input_fn, steps=1)
+ classifier.export(self._export_dir_base)
+ result_iter = classifier.predict(input_fn=_eval_input_fn)
+ for prediction_dict in result_iter:
+ self.assertTrue("classes" in prediction_dict)
+
class CoreGradientBoostedDecisionTreeEstimators(test_util.TensorFlowTestCase):
@@ -303,6 +392,87 @@ class CoreGradientBoostedDecisionTreeEstimators(test_util.TensorFlowTestCase):
est.evaluate(input_fn=_ranking_train_input_fn, steps=1)
est.predict(input_fn=_infer_ranking_train_input_fn)
+ def testFitAndEvaluateMultiClassTreePerClasssDontThrowException(self):
+ n_classes = 3
+ learner_config = learner_pb2.LearnerConfig()
+ learner_config.num_classes = n_classes
+ learner_config.constraints.max_tree_depth = 1
+ learner_config.multi_class_strategy = (
+ learner_pb2.LearnerConfig.TREE_PER_CLASS)
+
+ head_fn = estimator.core_multiclass_head(n_classes=n_classes)
+
+ model_dir = tempfile.mkdtemp()
+ config = run_config.RunConfig()
+
+ classifier = estimator.CoreGradientBoostedDecisionTreeEstimator(
+ learner_config=learner_config,
+ head=head_fn,
+ num_trees=1,
+ center_bias=False,
+ examples_per_layer=7,
+ model_dir=model_dir,
+ config=config,
+ feature_columns=[core_feature_column.numeric_column("x")])
+
+ classifier.train(input_fn=_multiclass_train_input_fn, steps=100)
+ classifier.evaluate(input_fn=_multiclass_train_input_fn, steps=1)
+ classifier.predict(input_fn=_eval_input_fn)
+
+ def testFitAndEvaluateMultiClassDiagonalDontThrowException(self):
+ n_classes = 3
+ learner_config = learner_pb2.LearnerConfig()
+ learner_config.num_classes = n_classes
+ learner_config.constraints.max_tree_depth = 1
+ learner_config.multi_class_strategy = (
+ learner_pb2.LearnerConfig.DIAGONAL_HESSIAN)
+
+ head_fn = estimator.core_multiclass_head(n_classes=n_classes)
+
+ model_dir = tempfile.mkdtemp()
+ config = run_config.RunConfig()
+
+ classifier = estimator.CoreGradientBoostedDecisionTreeEstimator(
+ learner_config=learner_config,
+ head=head_fn,
+ num_trees=1,
+ center_bias=False,
+ examples_per_layer=7,
+ model_dir=model_dir,
+ config=config,
+ feature_columns=[core_feature_column.numeric_column("x")])
+
+ classifier.train(input_fn=_multiclass_train_input_fn, steps=100)
+ classifier.evaluate(input_fn=_multiclass_train_input_fn, steps=1)
+ classifier.predict(input_fn=_eval_input_fn)
+
+ def testFitAndEvaluateMultiClassFullDontThrowException(self):
+ n_classes = 3
+ learner_config = learner_pb2.LearnerConfig()
+ learner_config.num_classes = n_classes
+ learner_config.constraints.max_tree_depth = 1
+ learner_config.multi_class_strategy = (
+ learner_pb2.LearnerConfig.FULL_HESSIAN)
+
+ head_fn = estimator.core_multiclass_head(n_classes=n_classes)
+
+ model_dir = tempfile.mkdtemp()
+ config = run_config.RunConfig()
+
+ classifier = estimator.CoreGradientBoostedDecisionTreeEstimator(
+ learner_config=learner_config,
+ head=head_fn,
+ num_trees=1,
+ center_bias=False,
+ examples_per_layer=7,
+ model_dir=model_dir,
+ config=config,
+ feature_columns=[core_feature_column.numeric_column("x")])
+
+ classifier.train(input_fn=_multiclass_train_input_fn, steps=100)
+ classifier.evaluate(input_fn=_multiclass_train_input_fn, steps=1)
+ classifier.predict(input_fn=_eval_input_fn)
+
if __name__ == "__main__":
googletest.main()
diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py
index 19e053fcb6..ba5ef700c5 100644
--- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py
+++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py
@@ -353,6 +353,9 @@ class GradientBoostedDecisionTreeModel(object):
self._gradient_shape = tensor_shape.scalar()
self._hessian_shape = tensor_shape.scalar()
else:
+ if center_bias:
+ raise ValueError("Center bias should be False for multiclass.")
+
self._gradient_shape = tensor_shape.TensorShape([logits_dimension])
if (learner_config.multi_class_strategy ==
learner_pb2.LearnerConfig.FULL_HESSIAN):