From b0bbb8fc18c9116b8d2a7c0fa6c0daa48db0027f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 15 Nov 2016 09:33:37 -0800 Subject: Refactor DNNClassifier to use head. Change: 139210204 --- .../contrib/learn/python/learn/estimators/dnn.py | 246 +++++---------------- .../learn/python/learn/estimators/dnn_test.py | 59 ++++- .../learn/python/learn/estimators/estimator.py | 10 +- 3 files changed, 112 insertions(+), 203 deletions(-) diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn.py b/tensorflow/contrib/learn/python/learn/estimators/dnn.py index f8c0a6fe5d..f23adc0c34 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dnn.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dnn.py @@ -19,42 +19,27 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import tempfile - from tensorflow.contrib import layers -from tensorflow.contrib import metrics as metrics_lib from tensorflow.contrib.framework import deprecated from tensorflow.contrib.framework import deprecated_arg_values -from tensorflow.contrib.framework import list_variables -from tensorflow.contrib.framework import load_variable from tensorflow.contrib.framework.python.ops import variables as contrib_variables from tensorflow.contrib.layers.python.layers import optimizers from tensorflow.contrib.learn.python.learn import evaluable -from tensorflow.contrib.learn.python.learn import metric_spec from tensorflow.contrib.learn.python.learn import monitors as monitor_lib from tensorflow.contrib.learn.python.learn import trainable from tensorflow.contrib.learn.python.learn.estimators import dnn_linear_combined from tensorflow.contrib.learn.python.learn.estimators import estimator +from tensorflow.contrib.learn.python.learn.estimators import head as head_lib from tensorflow.contrib.learn.python.learn.estimators import model_fn +from tensorflow.contrib.learn.python.learn.estimators import prediction_key from tensorflow.contrib.learn.python.learn.utils import export -from tensorflow.contrib.losses.python.losses import loss_ops from tensorflow.python import summary -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn from tensorflow.python.ops import partitioned_variables from tensorflow.python.ops import variable_scope -from tensorflow.python.ops import variables -from tensorflow.python.training import training as train -_CENTERED_BIAS = "centered_bias" _CENTERED_BIAS_WEIGHT = "centered_bias_weight" -_CLASSES = "classes" -_LOGISTIC = "logistic" -_PROBABILITIES = "probabilities" # The default learning rate of 0.05 is a historical artifact of the initial # implementation, but seems a reasonable choice. @@ -79,83 +64,7 @@ def _add_hidden_layer_summary(value, tag): summary.histogram("%s:activation" % tag, value) -def _centered_bias(num_label_columns): - centered_bias = variables.Variable( - array_ops.zeros([num_label_columns]), - collections=[_CENTERED_BIAS, ops.GraphKeys.GLOBAL_VARIABLES], - name=_CENTERED_BIAS_WEIGHT) - for i in range(num_label_columns): - summary.scalar("centered_bias %d" % i, centered_bias[i]) - return centered_bias - - -def _centered_bias_step(labels, loss_fn, num_label_columns): - centered_bias = ops.get_collection(_CENTERED_BIAS) - batch_size = array_ops.shape(labels)[0] - logits = array_ops.reshape( - array_ops.tile(centered_bias[0], [batch_size]), - [batch_size, num_label_columns]) - loss = loss_fn(logits, labels) - return train.AdagradOptimizer(0.1).minimize(loss, var_list=centered_bias) - - -def _get_weight_tensor(features, weight_column_name): - """Returns the weight tensor of shape [batch_size] or 1.""" - if weight_column_name is None: - return 1.0 - else: - return array_ops.reshape( - math_ops.to_float(features[weight_column_name]), - shape=(-1,)) - - -def _reshape_labels(labels): - """"Reshapes labels into [batch_size, 1] to be compatible with logits.""" - check_shape_op = control_flow_ops.Assert( - math_ops.less_equal(array_ops.rank(labels), 2), - ["labels shape should be either [batch_size, 1] or [batch_size]"]) - with ops.control_dependencies([check_shape_op]): - labels = array_ops.reshape(labels, - shape=[array_ops.shape(labels)[0], 1]) - - return labels - - -def _rescale_eval_loss(loss, weights): - """Rescales evaluation loss according to the given weights. - - The rescaling is needed because in the training loss weights are not - considered in the denominator, whereas for the evaluation loss we should - divide by the sum of weights. - - The rescaling factor is: - R = sum_{i} 1 / sum_{i} w_{i} - - Args: - loss: the scalar weighted loss. - weights: weight coefficients. Either a scalar, or a `Tensor` of shape - [batch_size]. - - Returns: - The given loss multiplied by the rescaling factor. - """ - rescaling_factor = math_ops.reduce_mean(weights) - return math_ops.div(loss, rescaling_factor) - - -def _predictions(logits, n_classes): - """Returns predictions for the given logits and n_classes.""" - predictions = {} - if n_classes == 2: - predictions[_LOGISTIC] = math_ops.sigmoid(logits) - logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) - predictions[_PROBABILITIES] = nn.softmax(logits) - predictions[_CLASSES] = array_ops.reshape( - math_ops.argmax(logits, 1), shape=(-1, 1)) - return predictions - - -def _dnn_classifier_model_fn(features, labels, mode, params): +def _dnn_model_fn(features, labels, mode, params): """Deep Neural Net model_fn. Args: @@ -166,23 +75,19 @@ def _dnn_classifier_model_fn(features, labels, mode, params): See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: + * head: A `_Head` instance. * hidden_units: List of hidden units per layer. * feature_columns: An iterable containing all the feature columns used by the model. - * n_classes: number of label classes. - * weight_column_name: A string defining the weight feature column, or - None if there are no weights. * optimizer: string, `Optimizer` object, or callable that defines the - optimizer to use for training. + optimizer to use for training. If `None`, will use the Adagrad + optimizer with a default learning rate of 0.05. * activation_fn: Activation function applied to each layer. If `None`, will use `tf.nn.relu`. * dropout: When not `None`, the probability we will drop out a given coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. - * enable_centered_bias: A bool. If True, estimator will learn a centered - bias variable for each class. Rest of the model structure learns the - residual after centered bias. * num_ps_replicas: The number of parameter server replicas. Returns: @@ -190,24 +95,17 @@ def _dnn_classifier_model_fn(features, labels, mode, params): loss: A scalar containing the loss of the step. train_op: The op for training. """ + head = params["head"] hidden_units = params["hidden_units"] feature_columns = params["feature_columns"] - n_classes = params["n_classes"] - weight_column_name = params["weight_column_name"] - optimizer = params["optimizer"] - activation_fn = params["activation_fn"] - dropout = params["dropout"] - gradient_clip_norm = params["gradient_clip_norm"] - enable_centered_bias = params["enable_centered_bias"] - num_ps_replicas = params["num_ps_replicas"] + optimizer = params.get("optimizer") or "Adagrad" + activation_fn = params.get("activation_fn") + dropout = params.get("dropout") + gradient_clip_norm = params.get("gradient_clip_norm") + num_ps_replicas = params.get("num_ps_replicas", 0) features = _get_feature_dict(features) parent_scope = "dnn" - num_label_columns = 1 if n_classes == 2 else n_classes - if n_classes == 2: - loss_fn = loss_ops.sigmoid_cross_entropy - else: - loss_fn = loss_ops.sparse_softmax_cross_entropy input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( @@ -249,51 +147,25 @@ def _dnn_classifier_model_fn(features, labels, mode, params): partitioner=hidden_layer_partitioner) as scope: logits = layers.fully_connected( net, - num_label_columns, + head.logits_dimension, activation_fn=None, variables_collections=[parent_scope], scope=scope) _add_hidden_layer_summary(logits, scope.name) - if enable_centered_bias: - logits = nn.bias_add(logits, _centered_bias(num_label_columns)) - - if mode == model_fn.ModeKeys.TRAIN: - labels = _reshape_labels(labels) - weights = _get_weight_tensor(features, weight_column_name) - training_loss = loss_fn(logits, labels, weights=weights) - loss = _rescale_eval_loss(training_loss, weights) - - train_ops = [optimizers.optimize_loss( - loss=training_loss, + def _train_op_fn(loss): + """Returns the op to optimize the loss.""" + return optimizers.optimize_loss( + loss=loss, global_step=contrib_variables.get_global_step(), learning_rate=_LEARNING_RATE, optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm, name=parent_scope, # Empty summaries to prevent optimizers from logging the training_loss. - summaries=[])] - if enable_centered_bias: - train_ops.append(_centered_bias_step(labels, loss_fn, num_label_columns)) - - summary.scalar("loss", loss) - - return None, loss, control_flow_ops.group(*train_ops) - - elif mode == model_fn.ModeKeys.EVAL: - predictions = _predictions(logits=logits, n_classes=n_classes) + summaries=[]) - labels = _reshape_labels(labels) - weights = _get_weight_tensor(features, weight_column_name) - training_loss = loss_fn(logits, labels, weights=weights) - loss = _rescale_eval_loss(training_loss, weights) - - return predictions, loss, [] - - else: # mode == estimator.ModeKeys.INFER: - predictions = _predictions(logits=logits, n_classes=n_classes) - - return predictions, None, [] + return head.head_ops(features, labels, mode, _train_op_fn, logits) class DNNClassifier(evaluable.Evaluable, trainable.Trainable): @@ -408,30 +280,24 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable): """ self._hidden_units = hidden_units self._feature_columns = feature_columns - self._model_dir = model_dir or tempfile.mkdtemp() - if n_classes <= 1: - raise ValueError( - "Classification requires n_classes >= 2. Given: {}".format(n_classes)) - self._n_classes = n_classes - self._weight_column_name = weight_column_name - optimizer = optimizer or "Adagrad" - num_ps_replicas = config.num_ps_replicas if config else 0 + self._enable_centered_bias = enable_centered_bias self._estimator = estimator.Estimator( - model_fn=_dnn_classifier_model_fn, - model_dir=self._model_dir, + model_fn=_dnn_model_fn, + model_dir=model_dir, config=config, params={ + "head": head_lib._multi_class_head( # pylint: disable=protected-access + n_classes, + weight_column_name=weight_column_name, + enable_centered_bias=enable_centered_bias), "hidden_units": hidden_units, "feature_columns": feature_columns, - "n_classes": n_classes, - "weight_column_name": weight_column_name, "optimizer": optimizer, "activation_fn": activation_fn, "dropout": dropout, "gradient_clip_norm": gradient_clip_norm, - "enable_centered_bias": enable_centered_bias, - "num_ps_replicas": num_ps_replicas, + "num_ps_replicas": config.num_ps_replicas if config else 0, }, feature_engineering_fn=feature_engineering_fn) @@ -452,19 +318,6 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable): def evaluate(self, x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None): """See evaluable.Evaluable. Note: Labels must be integer class indices.""" - if metrics is None: - metrics = {} - metrics.update({ - "accuracy": metric_spec.MetricSpec( - metric_fn=metrics_lib.streaming_accuracy, - prediction_key=_CLASSES, - weight_key=self._weight_column_name)}) - if self._n_classes == 2: - metrics.update({ - "auc": metric_spec.MetricSpec( - metric_fn=metrics_lib.streaming_auc, - prediction_key=_LOGISTIC, - weight_key=self._weight_column_name)}) return self._estimator.evaluate( x=x, y=y, input_fn=input_fn, feed_fn=feed_fn, batch_size=batch_size, steps=steps, metrics=metrics, name=name) @@ -489,12 +342,13 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable): as_iterable is True). Each predicted class is represented by its class index (i.e. integer from 0 to n_classes-1). """ + key = prediction_key.PredictionKey.CLASSES preds = self._estimator.predict(x=x, input_fn=input_fn, - batch_size=batch_size, outputs=[_CLASSES], + batch_size=batch_size, outputs=[key], as_iterable=as_iterable) if as_iterable: - return (pred[_CLASSES][0] for pred in preds) - return preds[_CLASSES].reshape(-1) + return (pred[key] for pred in preds) + return preds[key].reshape(-1) @deprecated_arg_values( estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS, @@ -517,13 +371,14 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable): probabilities if as_iterable is True). Each predicted class is represented by its class index (i.e. integer from 0 to n_classes-1). """ + key = prediction_key.PredictionKey.PROBABILITIES preds = self._estimator.predict(x=x, input_fn=input_fn, batch_size=batch_size, - outputs=[_PROBABILITIES], + outputs=[key], as_iterable=as_iterable) if as_iterable: - return (pred[_PROBABILITIES] for pred in preds) - return preds[_PROBABILITIES] + return (pred[key] for pred in preds) + return preds[key] def _get_predict_ops(self, features): """See `Estimator` class.""" @@ -537,7 +392,7 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable): Returns: List of names. """ - return [name for name, _ in list_variables(self._model_dir)] + return self._estimator.get_variable_names() def get_variable_value(self, name): """Returns value of the variable given by name. @@ -548,7 +403,7 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable): Returns: `Tensor` object. """ - return load_variable(self._model_dir, name) + return self._estimator.get_variable_value(name) def export(self, export_dir, @@ -569,13 +424,13 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable): use_deprecated_input_fn=use_deprecated_input_fn, signature_fn=( signature_fn or export.classification_signature_fn_with_prob), - prediction_key=_PROBABILITIES, + prediction_key=prediction_key.PredictionKey.PROBABILITIES, default_batch_size=default_batch_size, exports_to_keep=exports_to_keep) @property def model_dir(self): - return self._model_dir + return self._estimator.model_dir @property @deprecated("2016-10-30", @@ -583,10 +438,11 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable): "To inspect variables, use get_variable_names() and " "get_variable_value().") def weights_(self): - hiddenlayer_weights = [load_variable( - self._model_dir, name=("dnn/hiddenlayer_%d/weights" % i)) - for i, _ in enumerate(self._hidden_units)] - logits_weights = [load_variable(self._model_dir, name="dnn/logits/weights")] + hiddenlayer_weights = [ + self.get_variable_value("dnn/hiddenlayer_%d/weights" % i) + for i, _ in enumerate(self._hidden_units) + ] + logits_weights = [self.get_variable_value("dnn/logits/weights")] return hiddenlayer_weights + logits_weights @property @@ -595,13 +451,13 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable): "To inspect variables, use get_variable_names() and " "get_variable_value().") def bias_(self): - hiddenlayer_bias = [load_variable( - self._model_dir, name=("dnn/hiddenlayer_%d/biases" % i)) - for i, _ in enumerate(self._hidden_units)] - logits_bias = [load_variable(self._model_dir, name="dnn/logits/biases")] - if self._estimator.params["enable_centered_bias"]: - centered_bias = [ - load_variable(self._model_dir, name=_CENTERED_BIAS_WEIGHT)] + hiddenlayer_bias = [ + self.get_variable_value("dnn/hiddenlayer_%d/biases" % i) + for i, _ in enumerate(self._hidden_units) + ] + logits_bias = [self.get_variable_value("dnn/logits/biases")] + if self._enable_centered_bias: + centered_bias = [self.get_variable_value(_CENTERED_BIAS_WEIGHT)] else: centered_bias = [] return hiddenlayer_bias + logits_bias + centered_bias diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py index fbcc245018..c808aa0269 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py @@ -578,7 +578,7 @@ class DNNClassifierTest(tf.test.TestCase): classifier.export(export_dir) def testEnableCenteredBias(self): - """Tests that we can disable centered bias.""" + """Tests that we can enable centered bias.""" cont_features = [ tf.contrib.layers.real_valued_column('feature', dimension=4)] @@ -589,12 +589,30 @@ class DNNClassifierTest(tf.test.TestCase): enable_centered_bias=True, config=tf.contrib.learn.RunConfig(tf_random_seed=1)) - classifier.fit(input_fn=_iris_input_multiclass_fn, steps=200) + classifier.fit(input_fn=_iris_input_multiclass_fn, steps=5) self.assertIn('centered_bias_weight', classifier.get_variable_names()) scores = classifier.evaluate(input_fn=_iris_input_multiclass_fn, steps=1) self._assertInRange(0.0, 1.0, scores['accuracy']) self.assertIn('loss', scores) + def testDisableCenteredBias(self): + """Tests that we can disable centered bias.""" + cont_features = [ + tf.contrib.layers.real_valued_column('feature', dimension=4)] + + classifier = tf.contrib.learn.DNNClassifier( + n_classes=3, + feature_columns=cont_features, + hidden_units=[3, 3], + enable_centered_bias=False, + config=tf.contrib.learn.RunConfig(tf_random_seed=1)) + + classifier.fit(input_fn=_iris_input_multiclass_fn, steps=5) + self.assertNotIn('centered_bias_weight', classifier.get_variable_names()) + scores = classifier.evaluate(input_fn=_iris_input_multiclass_fn, steps=1) + self._assertInRange(0.0, 1.0, scores['accuracy']) + self.assertIn('loss', scores) + class DNNRegressorTest(tf.test.TestCase): @@ -962,7 +980,7 @@ class DNNRegressorTest(tf.test.TestCase): self.assertIn('loss', scores) def testEnableCenteredBias(self): - """Tests that we can disable centered bias.""" + """Tests that we can enable centered bias.""" def _input_fn(num_epochs=None): features = { 'age': tf.train.limit_epochs( @@ -988,12 +1006,45 @@ class DNNRegressorTest(tf.test.TestCase): enable_centered_bias=True, config=tf.contrib.learn.RunConfig(tf_random_seed=3)) - regressor.fit(input_fn=_input_fn, steps=200) + regressor.fit(input_fn=_input_fn, steps=5) self.assertIn('centered_bias_weight', regressor.get_variable_names()) scores = regressor.evaluate(input_fn=_input_fn, steps=1) self.assertIn('loss', scores) + def testDisableCenteredBias(self): + """Tests that we can disable centered bias.""" + def _input_fn(num_epochs=None): + features = { + 'age': tf.train.limit_epochs( + tf.constant([[0.8], [0.15], [0.]]), num_epochs=num_epochs), + 'language': tf.SparseTensor( + values=tf.train.limit_epochs( + ['en', 'fr', 'zh'], num_epochs=num_epochs), + indices=[[0, 0], [0, 1], [2, 0]], + shape=[3, 2]) + } + return features, tf.constant([1., 0., 0.2], dtype=tf.float32) + + sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket( + 'language', hash_bucket_size=20) + feature_columns = [ + tf.contrib.layers.embedding_column(sparse_column, dimension=1), + tf.contrib.layers.real_valued_column('age') + ] + + regressor = tf.contrib.learn.DNNRegressor( + feature_columns=feature_columns, + hidden_units=[3, 3], + enable_centered_bias=False, + config=tf.contrib.learn.RunConfig(tf_random_seed=3)) + + regressor.fit(input_fn=_input_fn, steps=5) + self.assertNotIn('centered_bias_weight', regressor.get_variable_names()) + + scores = regressor.evaluate(input_fn=_input_fn, steps=1) + self.assertIn('loss', scores) + def boston_input_fn(): boston = tf.contrib.learn.datasets.load_boston() diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py index b92ed3ccb0..79451c4280 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py @@ -242,7 +242,8 @@ def _make_metrics_ops(metrics, features, labels, predictions): labels_tensor_or_dict = labels[list(labels.keys())[0]] result = {} - for name, metric in six.iteritems(metrics): + # Iterate in lexicographic order, so the graph is identical among runs. + for name, metric in sorted(six.iteritems(metrics)): if isinstance(metric, metric_spec.MetricSpec): result[name] = metric.create_metric_ops(features, labels, predictions) continue @@ -744,7 +745,7 @@ class BaseEstimator( """Separate update operations from metric value operations.""" update_ops = [] value_ops = {} - for name, metric_ops in eval_dict.items(): + for name, metric_ops in six.iteritems(eval_dict): if isinstance(metric_ops, (list, tuple)): if len(metric_ops) == 2: value_ops[name] = metric_ops[0] @@ -860,7 +861,8 @@ class BaseEstimator( if outputs: existing_keys = predictions.keys() predictions = { - key: value for key, value in predictions.items() if key in outputs + key: value + for key, value in six.iteritems(predictions) if key in outputs } if not predictions: raise ValueError('Expected to run at least one output from %s, ' @@ -911,7 +913,7 @@ class BaseEstimator( if return_dict: batch_length = list(output_batch.values())[0].shape[0] for i in range(batch_length): - yield {key: value[i] for key, value in output_batch.items()} + yield {key: value[i] for key, value in six.iteritems(output_batch)} else: for pred in output_batch['predictions']: yield pred -- cgit v1.2.3