aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2016-11-15 09:33:37 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-11-15 09:44:56 -0800
commitb0bbb8fc18c9116b8d2a7c0fa6c0daa48db0027f (patch)
treefb401a812fc8ac4df39cf67f503ded60783b4093
parent3842d31e89e7c4ceef8920f1f209bdee4d9c190d (diff)
Refactor DNNClassifier to use head.
Change: 139210204
-rw-r--r--tensorflow/contrib/learn/python/learn/estimators/dnn.py246
-rw-r--r--tensorflow/contrib/learn/python/learn/estimators/dnn_test.py59
-rw-r--r--tensorflow/contrib/learn/python/learn/estimators/estimator.py10
3 files changed, 112 insertions, 203 deletions
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn.py b/tensorflow/contrib/learn/python/learn/estimators/dnn.py
index f8c0a6fe5d..f23adc0c34 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn.py
@@ -19,42 +19,27 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-import tempfile
-
from tensorflow.contrib import layers
-from tensorflow.contrib import metrics as metrics_lib
from tensorflow.contrib.framework import deprecated
from tensorflow.contrib.framework import deprecated_arg_values
-from tensorflow.contrib.framework import list_variables
-from tensorflow.contrib.framework import load_variable
from tensorflow.contrib.framework.python.ops import variables as contrib_variables
from tensorflow.contrib.layers.python.layers import optimizers
from tensorflow.contrib.learn.python.learn import evaluable
-from tensorflow.contrib.learn.python.learn import metric_spec
from tensorflow.contrib.learn.python.learn import monitors as monitor_lib
from tensorflow.contrib.learn.python.learn import trainable
from tensorflow.contrib.learn.python.learn.estimators import dnn_linear_combined
from tensorflow.contrib.learn.python.learn.estimators import estimator
+from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
from tensorflow.contrib.learn.python.learn.estimators import model_fn
+from tensorflow.contrib.learn.python.learn.estimators import prediction_key
from tensorflow.contrib.learn.python.learn.utils import export
-from tensorflow.contrib.losses.python.losses import loss_ops
from tensorflow.python import summary
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn
from tensorflow.python.ops import partitioned_variables
from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
-from tensorflow.python.training import training as train
-_CENTERED_BIAS = "centered_bias"
_CENTERED_BIAS_WEIGHT = "centered_bias_weight"
-_CLASSES = "classes"
-_LOGISTIC = "logistic"
-_PROBABILITIES = "probabilities"
# The default learning rate of 0.05 is a historical artifact of the initial
# implementation, but seems a reasonable choice.
@@ -79,83 +64,7 @@ def _add_hidden_layer_summary(value, tag):
summary.histogram("%s:activation" % tag, value)
-def _centered_bias(num_label_columns):
- centered_bias = variables.Variable(
- array_ops.zeros([num_label_columns]),
- collections=[_CENTERED_BIAS, ops.GraphKeys.GLOBAL_VARIABLES],
- name=_CENTERED_BIAS_WEIGHT)
- for i in range(num_label_columns):
- summary.scalar("centered_bias %d" % i, centered_bias[i])
- return centered_bias
-
-
-def _centered_bias_step(labels, loss_fn, num_label_columns):
- centered_bias = ops.get_collection(_CENTERED_BIAS)
- batch_size = array_ops.shape(labels)[0]
- logits = array_ops.reshape(
- array_ops.tile(centered_bias[0], [batch_size]),
- [batch_size, num_label_columns])
- loss = loss_fn(logits, labels)
- return train.AdagradOptimizer(0.1).minimize(loss, var_list=centered_bias)
-
-
-def _get_weight_tensor(features, weight_column_name):
- """Returns the weight tensor of shape [batch_size] or 1."""
- if weight_column_name is None:
- return 1.0
- else:
- return array_ops.reshape(
- math_ops.to_float(features[weight_column_name]),
- shape=(-1,))
-
-
-def _reshape_labels(labels):
- """"Reshapes labels into [batch_size, 1] to be compatible with logits."""
- check_shape_op = control_flow_ops.Assert(
- math_ops.less_equal(array_ops.rank(labels), 2),
- ["labels shape should be either [batch_size, 1] or [batch_size]"])
- with ops.control_dependencies([check_shape_op]):
- labels = array_ops.reshape(labels,
- shape=[array_ops.shape(labels)[0], 1])
-
- return labels
-
-
-def _rescale_eval_loss(loss, weights):
- """Rescales evaluation loss according to the given weights.
-
- The rescaling is needed because in the training loss weights are not
- considered in the denominator, whereas for the evaluation loss we should
- divide by the sum of weights.
-
- The rescaling factor is:
- R = sum_{i} 1 / sum_{i} w_{i}
-
- Args:
- loss: the scalar weighted loss.
- weights: weight coefficients. Either a scalar, or a `Tensor` of shape
- [batch_size].
-
- Returns:
- The given loss multiplied by the rescaling factor.
- """
- rescaling_factor = math_ops.reduce_mean(weights)
- return math_ops.div(loss, rescaling_factor)
-
-
-def _predictions(logits, n_classes):
- """Returns predictions for the given logits and n_classes."""
- predictions = {}
- if n_classes == 2:
- predictions[_LOGISTIC] = math_ops.sigmoid(logits)
- logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
- predictions[_PROBABILITIES] = nn.softmax(logits)
- predictions[_CLASSES] = array_ops.reshape(
- math_ops.argmax(logits, 1), shape=(-1, 1))
- return predictions
-
-
-def _dnn_classifier_model_fn(features, labels, mode, params):
+def _dnn_model_fn(features, labels, mode, params):
"""Deep Neural Net model_fn.
Args:
@@ -166,23 +75,19 @@ def _dnn_classifier_model_fn(features, labels, mode, params):
See `ModeKeys`.
params: A dict of hyperparameters.
The following hyperparameters are expected:
+ * head: A `_Head` instance.
* hidden_units: List of hidden units per layer.
* feature_columns: An iterable containing all the feature columns used by
the model.
- * n_classes: number of label classes.
- * weight_column_name: A string defining the weight feature column, or
- None if there are no weights.
* optimizer: string, `Optimizer` object, or callable that defines the
- optimizer to use for training.
+ optimizer to use for training. If `None`, will use the Adagrad
+ optimizer with a default learning rate of 0.05.
* activation_fn: Activation function applied to each layer. If `None`,
will use `tf.nn.relu`.
* dropout: When not `None`, the probability we will drop out a given
coordinate.
* gradient_clip_norm: A float > 0. If provided, gradients are
clipped to their global norm with this clipping ratio.
- * enable_centered_bias: A bool. If True, estimator will learn a centered
- bias variable for each class. Rest of the model structure learns the
- residual after centered bias.
* num_ps_replicas: The number of parameter server replicas.
Returns:
@@ -190,24 +95,17 @@ def _dnn_classifier_model_fn(features, labels, mode, params):
loss: A scalar containing the loss of the step.
train_op: The op for training.
"""
+ head = params["head"]
hidden_units = params["hidden_units"]
feature_columns = params["feature_columns"]
- n_classes = params["n_classes"]
- weight_column_name = params["weight_column_name"]
- optimizer = params["optimizer"]
- activation_fn = params["activation_fn"]
- dropout = params["dropout"]
- gradient_clip_norm = params["gradient_clip_norm"]
- enable_centered_bias = params["enable_centered_bias"]
- num_ps_replicas = params["num_ps_replicas"]
+ optimizer = params.get("optimizer") or "Adagrad"
+ activation_fn = params.get("activation_fn")
+ dropout = params.get("dropout")
+ gradient_clip_norm = params.get("gradient_clip_norm")
+ num_ps_replicas = params.get("num_ps_replicas", 0)
features = _get_feature_dict(features)
parent_scope = "dnn"
- num_label_columns = 1 if n_classes == 2 else n_classes
- if n_classes == 2:
- loss_fn = loss_ops.sigmoid_cross_entropy
- else:
- loss_fn = loss_ops.sparse_softmax_cross_entropy
input_layer_partitioner = (
partitioned_variables.min_max_variable_partitioner(
@@ -249,51 +147,25 @@ def _dnn_classifier_model_fn(features, labels, mode, params):
partitioner=hidden_layer_partitioner) as scope:
logits = layers.fully_connected(
net,
- num_label_columns,
+ head.logits_dimension,
activation_fn=None,
variables_collections=[parent_scope],
scope=scope)
_add_hidden_layer_summary(logits, scope.name)
- if enable_centered_bias:
- logits = nn.bias_add(logits, _centered_bias(num_label_columns))
-
- if mode == model_fn.ModeKeys.TRAIN:
- labels = _reshape_labels(labels)
- weights = _get_weight_tensor(features, weight_column_name)
- training_loss = loss_fn(logits, labels, weights=weights)
- loss = _rescale_eval_loss(training_loss, weights)
-
- train_ops = [optimizers.optimize_loss(
- loss=training_loss,
+ def _train_op_fn(loss):
+ """Returns the op to optimize the loss."""
+ return optimizers.optimize_loss(
+ loss=loss,
global_step=contrib_variables.get_global_step(),
learning_rate=_LEARNING_RATE,
optimizer=_get_optimizer(optimizer),
clip_gradients=gradient_clip_norm,
name=parent_scope,
# Empty summaries to prevent optimizers from logging the training_loss.
- summaries=[])]
- if enable_centered_bias:
- train_ops.append(_centered_bias_step(labels, loss_fn, num_label_columns))
-
- summary.scalar("loss", loss)
-
- return None, loss, control_flow_ops.group(*train_ops)
-
- elif mode == model_fn.ModeKeys.EVAL:
- predictions = _predictions(logits=logits, n_classes=n_classes)
+ summaries=[])
- labels = _reshape_labels(labels)
- weights = _get_weight_tensor(features, weight_column_name)
- training_loss = loss_fn(logits, labels, weights=weights)
- loss = _rescale_eval_loss(training_loss, weights)
-
- return predictions, loss, []
-
- else: # mode == estimator.ModeKeys.INFER:
- predictions = _predictions(logits=logits, n_classes=n_classes)
-
- return predictions, None, []
+ return head.head_ops(features, labels, mode, _train_op_fn, logits)
class DNNClassifier(evaluable.Evaluable, trainable.Trainable):
@@ -408,30 +280,24 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable):
"""
self._hidden_units = hidden_units
self._feature_columns = feature_columns
- self._model_dir = model_dir or tempfile.mkdtemp()
- if n_classes <= 1:
- raise ValueError(
- "Classification requires n_classes >= 2. Given: {}".format(n_classes))
- self._n_classes = n_classes
- self._weight_column_name = weight_column_name
- optimizer = optimizer or "Adagrad"
- num_ps_replicas = config.num_ps_replicas if config else 0
+ self._enable_centered_bias = enable_centered_bias
self._estimator = estimator.Estimator(
- model_fn=_dnn_classifier_model_fn,
- model_dir=self._model_dir,
+ model_fn=_dnn_model_fn,
+ model_dir=model_dir,
config=config,
params={
+ "head": head_lib._multi_class_head( # pylint: disable=protected-access
+ n_classes,
+ weight_column_name=weight_column_name,
+ enable_centered_bias=enable_centered_bias),
"hidden_units": hidden_units,
"feature_columns": feature_columns,
- "n_classes": n_classes,
- "weight_column_name": weight_column_name,
"optimizer": optimizer,
"activation_fn": activation_fn,
"dropout": dropout,
"gradient_clip_norm": gradient_clip_norm,
- "enable_centered_bias": enable_centered_bias,
- "num_ps_replicas": num_ps_replicas,
+ "num_ps_replicas": config.num_ps_replicas if config else 0,
},
feature_engineering_fn=feature_engineering_fn)
@@ -452,19 +318,6 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable):
def evaluate(self, x=None, y=None, input_fn=None, feed_fn=None,
batch_size=None, steps=None, metrics=None, name=None):
"""See evaluable.Evaluable. Note: Labels must be integer class indices."""
- if metrics is None:
- metrics = {}
- metrics.update({
- "accuracy": metric_spec.MetricSpec(
- metric_fn=metrics_lib.streaming_accuracy,
- prediction_key=_CLASSES,
- weight_key=self._weight_column_name)})
- if self._n_classes == 2:
- metrics.update({
- "auc": metric_spec.MetricSpec(
- metric_fn=metrics_lib.streaming_auc,
- prediction_key=_LOGISTIC,
- weight_key=self._weight_column_name)})
return self._estimator.evaluate(
x=x, y=y, input_fn=input_fn, feed_fn=feed_fn, batch_size=batch_size,
steps=steps, metrics=metrics, name=name)
@@ -489,12 +342,13 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable):
as_iterable is True). Each predicted class is represented by its class
index (i.e. integer from 0 to n_classes-1).
"""
+ key = prediction_key.PredictionKey.CLASSES
preds = self._estimator.predict(x=x, input_fn=input_fn,
- batch_size=batch_size, outputs=[_CLASSES],
+ batch_size=batch_size, outputs=[key],
as_iterable=as_iterable)
if as_iterable:
- return (pred[_CLASSES][0] for pred in preds)
- return preds[_CLASSES].reshape(-1)
+ return (pred[key] for pred in preds)
+ return preds[key].reshape(-1)
@deprecated_arg_values(
estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
@@ -517,13 +371,14 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable):
probabilities if as_iterable is True). Each predicted class is represented
by its class index (i.e. integer from 0 to n_classes-1).
"""
+ key = prediction_key.PredictionKey.PROBABILITIES
preds = self._estimator.predict(x=x, input_fn=input_fn,
batch_size=batch_size,
- outputs=[_PROBABILITIES],
+ outputs=[key],
as_iterable=as_iterable)
if as_iterable:
- return (pred[_PROBABILITIES] for pred in preds)
- return preds[_PROBABILITIES]
+ return (pred[key] for pred in preds)
+ return preds[key]
def _get_predict_ops(self, features):
"""See `Estimator` class."""
@@ -537,7 +392,7 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable):
Returns:
List of names.
"""
- return [name for name, _ in list_variables(self._model_dir)]
+ return self._estimator.get_variable_names()
def get_variable_value(self, name):
"""Returns value of the variable given by name.
@@ -548,7 +403,7 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable):
Returns:
`Tensor` object.
"""
- return load_variable(self._model_dir, name)
+ return self._estimator.get_variable_value(name)
def export(self,
export_dir,
@@ -569,13 +424,13 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable):
use_deprecated_input_fn=use_deprecated_input_fn,
signature_fn=(
signature_fn or export.classification_signature_fn_with_prob),
- prediction_key=_PROBABILITIES,
+ prediction_key=prediction_key.PredictionKey.PROBABILITIES,
default_batch_size=default_batch_size,
exports_to_keep=exports_to_keep)
@property
def model_dir(self):
- return self._model_dir
+ return self._estimator.model_dir
@property
@deprecated("2016-10-30",
@@ -583,10 +438,11 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable):
"To inspect variables, use get_variable_names() and "
"get_variable_value().")
def weights_(self):
- hiddenlayer_weights = [load_variable(
- self._model_dir, name=("dnn/hiddenlayer_%d/weights" % i))
- for i, _ in enumerate(self._hidden_units)]
- logits_weights = [load_variable(self._model_dir, name="dnn/logits/weights")]
+ hiddenlayer_weights = [
+ self.get_variable_value("dnn/hiddenlayer_%d/weights" % i)
+ for i, _ in enumerate(self._hidden_units)
+ ]
+ logits_weights = [self.get_variable_value("dnn/logits/weights")]
return hiddenlayer_weights + logits_weights
@property
@@ -595,13 +451,13 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable):
"To inspect variables, use get_variable_names() and "
"get_variable_value().")
def bias_(self):
- hiddenlayer_bias = [load_variable(
- self._model_dir, name=("dnn/hiddenlayer_%d/biases" % i))
- for i, _ in enumerate(self._hidden_units)]
- logits_bias = [load_variable(self._model_dir, name="dnn/logits/biases")]
- if self._estimator.params["enable_centered_bias"]:
- centered_bias = [
- load_variable(self._model_dir, name=_CENTERED_BIAS_WEIGHT)]
+ hiddenlayer_bias = [
+ self.get_variable_value("dnn/hiddenlayer_%d/biases" % i)
+ for i, _ in enumerate(self._hidden_units)
+ ]
+ logits_bias = [self.get_variable_value("dnn/logits/biases")]
+ if self._enable_centered_bias:
+ centered_bias = [self.get_variable_value(_CENTERED_BIAS_WEIGHT)]
else:
centered_bias = []
return hiddenlayer_bias + logits_bias + centered_bias
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py
index fbcc245018..c808aa0269 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py
@@ -578,7 +578,7 @@ class DNNClassifierTest(tf.test.TestCase):
classifier.export(export_dir)
def testEnableCenteredBias(self):
- """Tests that we can disable centered bias."""
+ """Tests that we can enable centered bias."""
cont_features = [
tf.contrib.layers.real_valued_column('feature', dimension=4)]
@@ -589,12 +589,30 @@ class DNNClassifierTest(tf.test.TestCase):
enable_centered_bias=True,
config=tf.contrib.learn.RunConfig(tf_random_seed=1))
- classifier.fit(input_fn=_iris_input_multiclass_fn, steps=200)
+ classifier.fit(input_fn=_iris_input_multiclass_fn, steps=5)
self.assertIn('centered_bias_weight', classifier.get_variable_names())
scores = classifier.evaluate(input_fn=_iris_input_multiclass_fn, steps=1)
self._assertInRange(0.0, 1.0, scores['accuracy'])
self.assertIn('loss', scores)
+ def testDisableCenteredBias(self):
+ """Tests that we can disable centered bias."""
+ cont_features = [
+ tf.contrib.layers.real_valued_column('feature', dimension=4)]
+
+ classifier = tf.contrib.learn.DNNClassifier(
+ n_classes=3,
+ feature_columns=cont_features,
+ hidden_units=[3, 3],
+ enable_centered_bias=False,
+ config=tf.contrib.learn.RunConfig(tf_random_seed=1))
+
+ classifier.fit(input_fn=_iris_input_multiclass_fn, steps=5)
+ self.assertNotIn('centered_bias_weight', classifier.get_variable_names())
+ scores = classifier.evaluate(input_fn=_iris_input_multiclass_fn, steps=1)
+ self._assertInRange(0.0, 1.0, scores['accuracy'])
+ self.assertIn('loss', scores)
+
class DNNRegressorTest(tf.test.TestCase):
@@ -962,7 +980,7 @@ class DNNRegressorTest(tf.test.TestCase):
self.assertIn('loss', scores)
def testEnableCenteredBias(self):
- """Tests that we can disable centered bias."""
+ """Tests that we can enable centered bias."""
def _input_fn(num_epochs=None):
features = {
'age': tf.train.limit_epochs(
@@ -988,12 +1006,45 @@ class DNNRegressorTest(tf.test.TestCase):
enable_centered_bias=True,
config=tf.contrib.learn.RunConfig(tf_random_seed=3))
- regressor.fit(input_fn=_input_fn, steps=200)
+ regressor.fit(input_fn=_input_fn, steps=5)
self.assertIn('centered_bias_weight', regressor.get_variable_names())
scores = regressor.evaluate(input_fn=_input_fn, steps=1)
self.assertIn('loss', scores)
+ def testDisableCenteredBias(self):
+ """Tests that we can disable centered bias."""
+ def _input_fn(num_epochs=None):
+ features = {
+ 'age': tf.train.limit_epochs(
+ tf.constant([[0.8], [0.15], [0.]]), num_epochs=num_epochs),
+ 'language': tf.SparseTensor(
+ values=tf.train.limit_epochs(
+ ['en', 'fr', 'zh'], num_epochs=num_epochs),
+ indices=[[0, 0], [0, 1], [2, 0]],
+ shape=[3, 2])
+ }
+ return features, tf.constant([1., 0., 0.2], dtype=tf.float32)
+
+ sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket(
+ 'language', hash_bucket_size=20)
+ feature_columns = [
+ tf.contrib.layers.embedding_column(sparse_column, dimension=1),
+ tf.contrib.layers.real_valued_column('age')
+ ]
+
+ regressor = tf.contrib.learn.DNNRegressor(
+ feature_columns=feature_columns,
+ hidden_units=[3, 3],
+ enable_centered_bias=False,
+ config=tf.contrib.learn.RunConfig(tf_random_seed=3))
+
+ regressor.fit(input_fn=_input_fn, steps=5)
+ self.assertNotIn('centered_bias_weight', regressor.get_variable_names())
+
+ scores = regressor.evaluate(input_fn=_input_fn, steps=1)
+ self.assertIn('loss', scores)
+
def boston_input_fn():
boston = tf.contrib.learn.datasets.load_boston()
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
index b92ed3ccb0..79451c4280 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
@@ -242,7 +242,8 @@ def _make_metrics_ops(metrics, features, labels, predictions):
labels_tensor_or_dict = labels[list(labels.keys())[0]]
result = {}
- for name, metric in six.iteritems(metrics):
+ # Iterate in lexicographic order, so the graph is identical among runs.
+ for name, metric in sorted(six.iteritems(metrics)):
if isinstance(metric, metric_spec.MetricSpec):
result[name] = metric.create_metric_ops(features, labels, predictions)
continue
@@ -744,7 +745,7 @@ class BaseEstimator(
"""Separate update operations from metric value operations."""
update_ops = []
value_ops = {}
- for name, metric_ops in eval_dict.items():
+ for name, metric_ops in six.iteritems(eval_dict):
if isinstance(metric_ops, (list, tuple)):
if len(metric_ops) == 2:
value_ops[name] = metric_ops[0]
@@ -860,7 +861,8 @@ class BaseEstimator(
if outputs:
existing_keys = predictions.keys()
predictions = {
- key: value for key, value in predictions.items() if key in outputs
+ key: value
+ for key, value in six.iteritems(predictions) if key in outputs
}
if not predictions:
raise ValueError('Expected to run at least one output from %s, '
@@ -911,7 +913,7 @@ class BaseEstimator(
if return_dict:
batch_length = list(output_batch.values())[0].shape[0]
for i in range(batch_length):
- yield {key: value[i] for key, value in output_batch.items()}
+ yield {key: value[i] for key, value in six.iteritems(output_batch)}
else:
for pred in output_batch['predictions']:
yield pred