# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for TensorForestTrainer.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np from tensorflow.contrib.learn.python.learn.datasets import base from tensorflow.contrib.tensor_forest.client import random_forest from tensorflow.contrib.tensor_forest.python import tensor_forest from tensorflow.python.estimator.canned import head as head_lib from tensorflow.python.estimator.inputs import numpy_io from tensorflow.python.feature_column import feature_column_lib as core_feature_column from tensorflow.python.framework import ops from tensorflow.python.ops.losses import losses from tensorflow.python.platform import test from tensorflow.python.training import checkpoint_utils def _get_classification_input_fns(): iris = base.load_iris() data = iris.data.astype(np.float32) labels = iris.target.astype(np.int32) train_input_fn = numpy_io.numpy_input_fn( x=data, y=labels, batch_size=150, num_epochs=None, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x=data[:1,], y=None, batch_size=1, num_epochs=1, shuffle=False) return train_input_fn, predict_input_fn def _get_regression_input_fns(): boston = base.load_boston() data = boston.data.astype(np.float32) labels = boston.target.astype(np.int32) train_input_fn = numpy_io.numpy_input_fn( x=data, y=labels, batch_size=506, num_epochs=None, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x=data[:1,], y=None, batch_size=1, num_epochs=1, shuffle=False) return train_input_fn, predict_input_fn class TensorForestTrainerTests(test.TestCase): def testClassification(self): """Tests multi-class classification using matrix data as input.""" hparams = tensor_forest.ForestHParams( num_trees=3, max_nodes=1000, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) classifier = random_forest.TensorForestEstimator(hparams.fill()) input_fn, predict_input_fn = _get_classification_input_fns() classifier.fit(input_fn=input_fn, steps=100) res = classifier.evaluate(input_fn=input_fn, steps=10) self.assertEqual(1.0, res['accuracy']) self.assertAllClose(0.55144483, res['loss']) predictions = list(classifier.predict(input_fn=predict_input_fn)) self.assertAllClose([[0.576117, 0.211942, 0.211942]], [pred['probabilities'] for pred in predictions]) def testRegression(self): """Tests regression using matrix data as input.""" hparams = tensor_forest.ForestHParams( num_trees=5, max_nodes=1000, num_classes=1, num_features=13, regression=True, split_after_samples=20) regressor = random_forest.TensorForestEstimator(hparams.fill()) input_fn, predict_input_fn = _get_regression_input_fns() regressor.fit(input_fn=input_fn, steps=100) res = regressor.evaluate(input_fn=input_fn, steps=10) self.assertGreaterEqual(0.1, res['loss']) predictions = list(regressor.predict(input_fn=predict_input_fn)) self.assertAllClose([24.], [pred['scores'] for pred in predictions], atol=1) def testAdditionalOutputs(self): """Tests multi-class classification using matrix data as input.""" hparams = tensor_forest.ForestHParams( num_trees=1, max_nodes=100, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) classifier = random_forest.TensorForestEstimator( hparams.fill(), keys_column='keys', include_all_in_serving=True) iris = base.load_iris() data = iris.data.astype(np.float32) labels = iris.target.astype(np.int32) input_fn = numpy_io.numpy_input_fn( x={ 'x': data, 'keys': np.arange(len(iris.data)).reshape(150, 1) }, y=labels, batch_size=10, num_epochs=1, shuffle=False) classifier.fit(input_fn=input_fn, steps=100) predictions = list(classifier.predict(input_fn=input_fn)) # Check that there is a key column, tree paths and var. for pred in predictions: self.assertTrue('keys' in pred) self.assertTrue('tree_paths' in pred) self.assertTrue('prediction_variance' in pred) def _assert_checkpoint(self, model_dir, global_step): reader = checkpoint_utils.load_checkpoint(model_dir) self.assertLessEqual( reader.get_tensor(ops.GraphKeys.GLOBAL_STEP), global_step) def testEarlyStopping(self): """Tests multi-class classification using matrix data as input.""" hparams = tensor_forest.ForestHParams( num_trees=100, max_nodes=10000, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) classifier = random_forest.TensorForestEstimator( hparams.fill(), # Set a crazy threshold - 30% loss change. early_stopping_loss_threshold=0.3, early_stopping_rounds=2) input_fn, _ = _get_classification_input_fns() classifier.fit(input_fn=input_fn, steps=100) # We stopped early. self._assert_checkpoint(classifier.model_dir, global_step=5) class CoreTensorForestTests(test.TestCase): def testTrainEvaluateInferDoesNotThrowErrorForClassifier(self): head_fn = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) hparams = tensor_forest.ForestHParams( num_trees=3, max_nodes=1000, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) est = random_forest.CoreTensorForestEstimator(hparams.fill(), head=head_fn) input_fn, predict_input_fn = _get_classification_input_fns() est.train(input_fn=input_fn, steps=100) res = est.evaluate(input_fn=input_fn, steps=1) self.assertEqual(1.0, res['accuracy']) self.assertAllClose(0.55144483, res['loss']) predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose([[0.576117, 0.211942, 0.211942]], [pred['probabilities'] for pred in predictions]) def testRegression(self): """Tests regression using matrix data as input.""" head_fn = head_lib._regression_head( label_dimension=1, loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) hparams = tensor_forest.ForestHParams( num_trees=5, max_nodes=1000, num_classes=1, num_features=13, regression=True, split_after_samples=20) regressor = random_forest.CoreTensorForestEstimator( hparams.fill(), head=head_fn) input_fn, predict_input_fn = _get_regression_input_fns() regressor.train(input_fn=input_fn, steps=100) res = regressor.evaluate(input_fn=input_fn, steps=10) self.assertGreaterEqual(0.1, res['loss']) predictions = list(regressor.predict(input_fn=predict_input_fn)) self.assertAllClose( [[24.]], [pred['predictions'] for pred in predictions], atol=1) def testWithFeatureColumns(self): head_fn = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) hparams = tensor_forest.ForestHParams( num_trees=3, max_nodes=1000, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) est = random_forest.CoreTensorForestEstimator( hparams.fill(), head=head_fn, feature_columns=[core_feature_column.numeric_column('x')]) iris = base.load_iris() data = {'x': iris.data.astype(np.float32)} labels = iris.target.astype(np.int32) input_fn = numpy_io.numpy_input_fn( x=data, y=labels, batch_size=150, num_epochs=None, shuffle=False) est.train(input_fn=input_fn, steps=100) res = est.evaluate(input_fn=input_fn, steps=1) self.assertEqual(1.0, res['accuracy']) self.assertAllClose(0.55144483, res['loss']) def testAutofillsClassificationHead(self): hparams = tensor_forest.ForestHParams( num_trees=3, max_nodes=1000, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) est = random_forest.CoreTensorForestEstimator(hparams.fill()) input_fn, _ = _get_classification_input_fns() est.train(input_fn=input_fn, steps=100) res = est.evaluate(input_fn=input_fn, steps=1) self.assertEqual(1.0, res['accuracy']) self.assertAllClose(0.55144483, res['loss']) def testAutofillsRegressionHead(self): hparams = tensor_forest.ForestHParams( num_trees=5, max_nodes=1000, num_classes=1, num_features=13, regression=True, split_after_samples=20) regressor = random_forest.CoreTensorForestEstimator(hparams.fill()) input_fn, predict_input_fn = _get_regression_input_fns() regressor.train(input_fn=input_fn, steps=100) res = regressor.evaluate(input_fn=input_fn, steps=10) self.assertGreaterEqual(0.1, res['loss']) predictions = list(regressor.predict(input_fn=predict_input_fn)) self.assertAllClose( [[24.]], [pred['predictions'] for pred in predictions], atol=1) def testAdditionalOutputs(self): """Tests multi-class classification using matrix data as input.""" hparams = tensor_forest.ForestHParams( num_trees=1, max_nodes=100, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) classifier = random_forest.CoreTensorForestEstimator( hparams.fill(), keys_column='keys', include_all_in_serving=True) iris = base.load_iris() data = iris.data.astype(np.float32) labels = iris.target.astype(np.int32) input_fn = numpy_io.numpy_input_fn( x={ 'x': data, 'keys': np.arange(len(iris.data)).reshape(150, 1) }, y=labels, batch_size=10, num_epochs=1, shuffle=False) classifier.train(input_fn=input_fn, steps=100) predictions = list(classifier.predict(input_fn=input_fn)) # Check that there is a key column, tree paths and var. for pred in predictions: self.assertTrue('keys' in pred) self.assertTrue('tree_paths' in pred) self.assertTrue('prediction_variance' in pred) def _assert_checkpoint(self, model_dir, global_step): reader = checkpoint_utils.load_checkpoint(model_dir) self.assertLessEqual( reader.get_tensor(ops.GraphKeys.GLOBAL_STEP), global_step) def testEarlyStopping(self): head_fn = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) hparams = tensor_forest.ForestHParams( num_trees=3, max_nodes=1000, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) est = random_forest.CoreTensorForestEstimator( hparams.fill(), head=head_fn, # Set a crazy threshold - 30% loss change. early_stopping_loss_threshold=0.3, early_stopping_rounds=2) input_fn, _ = _get_classification_input_fns() est.train(input_fn=input_fn, steps=100) # We stopped early. self._assert_checkpoint(est.model_dir, global_step=8) if __name__ == "__main__": test.main()