aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/metrics/python/metrics/classification.py
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/contrib/metrics/python/metrics/classification.py')
-rw-r--r--tensorflow/contrib/metrics/python/metrics/classification.py121
1 files changed, 121 insertions, 0 deletions
diff --git a/tensorflow/contrib/metrics/python/metrics/classification.py b/tensorflow/contrib/metrics/python/metrics/classification.py
index 26aba1cc51..e553612269 100644
--- a/tensorflow/contrib/metrics/python/metrics/classification.py
+++ b/tensorflow/contrib/metrics/python/metrics/classification.py
@@ -22,6 +22,9 @@ from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import metrics_impl
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.training import distribute as distribute_lib
# TODO(nsilberman): move into metrics/python/ops/
@@ -62,3 +65,121 @@ def accuracy(predictions, labels, weights=None, name=None):
return math_ops.div(math_ops.reduce_sum(is_correct),
math_ops.reduce_sum(num_values))
return math_ops.reduce_mean(is_correct)
+
+
+def f1_score(labels, predictions, weights=None, num_thresholds=200,
+ metrics_collections=None, updates_collections=None, name=None):
+ """Computes the approximately best F1-score across different thresholds.
+
+ The f1_score function applies a range of thresholds to the predictions to
+ convert them from [0, 1] to bool. Precision and recall are computed by
+ comparing them to the labels. The F1-Score is then defined as
+ 2 * precision * recall / (precision + recall). The best one across the
+ thresholds is returned.
+
+ Disclaimer: In practice it may be desirable to choose the best threshold on
+ the validation set and evaluate the F1 score with this threshold on a
+ separate test set. Or it may be desirable to use a fixed threshold (e.g. 0.5).
+
+ This function internally creates four local variables, `true_positives`,
+ `true_negatives`, `false_positives` and `false_negatives` that are used to
+ compute the pairs of recall and precision values for a linearly spaced set of
+ thresholds from which the best f1-score is derived.
+
+ This value is ultimately returned as `f1-score`, an idempotent operation that
+ computes the F1-score (computed using the aforementioned variables). The
+ `num_thresholds` variable controls the degree of discretization with larger
+ numbers of thresholds more closely approximating the true best F1-score.
+
+ For estimation of the metric over a stream of data, the function creates an
+ `update_op` operation that updates these variables and returns the F1-score.
+
+ Example usage with a custom estimator:
+ def model_fn(features, labels, mode):
+ predictions = make_predictions(features)
+ loss = make_loss(predictions, labels)
+ train_op = tf.contrib.training.create_train_op(
+ total_loss=loss,
+ optimizer='Adam')
+ eval_metric_ops = {'f1': f1_score(labels, predictions)}
+ return tf.estimator.EstimatorSpec(
+ mode=mode,
+ predictions=predictions,
+ loss=loss,
+ train_op=train_op,
+ eval_metric_ops=eval_metric_ops,
+ export_outputs=export_outputs)
+ estimator = tf.estimator.Estimator(model_fn=model_fn)
+
+ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
+
+ Args:
+ labels: A `Tensor` whose shape matches `predictions`. Will be cast to
+ `bool`.
+ predictions: A floating point `Tensor` of arbitrary shape and whose values
+ are in the range `[0, 1]`.
+ weights: Optional `Tensor` whose rank is either 0, or the same rank as
+ `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
+ be either `1`, or the same as the corresponding `labels` dimension).
+ num_thresholds: The number of thresholds to use when discretizing the roc
+ curve.
+ metrics_collections: An optional list of collections that `f1_score` should
+ be added to.
+ updates_collections: An optional list of collections that `update_op` should
+ be added to.
+ name: An optional variable_scope name.
+
+ Returns:
+ f1_score: A scalar `Tensor` representing the current best f1-score across
+ different thresholds.
+ update_op: An operation that increments the `true_positives`,
+ `true_negatives`, `false_positives` and `false_negatives` variables
+ appropriately and whose value matches the `f1_score`.
+
+ Raises:
+ ValueError: If `predictions` and `labels` have mismatched shapes, or if
+ `weights` is not `None` and its shape doesn't match `predictions`, or if
+ either `metrics_collections` or `updates_collections` are not a list or
+ tuple.
+ """
+ with variable_scope.variable_scope(
+ name, 'f1', (labels, predictions, weights)):
+ predictions, labels, weights = metrics_impl._remove_squeezable_dimensions( # pylint: disable=protected-access
+ predictions=predictions, labels=labels, weights=weights)
+ # To account for floating point imprecisions / avoid division by zero.
+ epsilon = 1e-7
+ thresholds = [(i + 1) * 1.0 / (num_thresholds - 1)
+ for i in range(num_thresholds - 2)]
+ thresholds = [0.0 - epsilon] + thresholds + [1.0 + epsilon]
+
+ # Confusion matrix.
+ values, update_ops = metrics_impl._confusion_matrix_at_thresholds( # pylint: disable=protected-access
+ labels, predictions, thresholds, weights, includes=('tp', 'fp', 'fn'))
+
+ # Compute precision and recall at various thresholds.
+ def compute_best_f1_score(tp, fp, fn, name):
+ precision_at_t = math_ops.div(tp, epsilon + tp + fp,
+ name='precision_' + name)
+ recall_at_t = math_ops.div(tp, epsilon + tp + fn, name='recall_' + name)
+ # Compute F1 score.
+ f1_at_thresholds = (
+ 2.0 * precision_at_t * recall_at_t /
+ (precision_at_t + recall_at_t + epsilon))
+ return math_ops.reduce_max(f1_at_thresholds)
+
+ def f1_across_towers(_, values):
+ best_f1 = compute_best_f1_score(tp=values['tp'], fp=values['fp'],
+ fn=values['fn'], name='value')
+ if metrics_collections:
+ ops.add_to_collections(metrics_collections, best_f1)
+ return best_f1
+
+ best_f1 = distribute_lib.get_tower_context().merge_call(
+ f1_across_towers, values)
+
+ update_op = compute_best_f1_score(tp=update_ops['tp'], fp=update_ops['fp'],
+ fn=update_ops['fn'], name='update')
+ if updates_collections:
+ ops.add_to_collections(updates_collections, update_op)
+
+ return best_f1, update_op