This creates a library under contrib for implementations of common metric learning losses,

starting with: * [Contrastive loss](http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf) * [Triplet Loss with semihard negative mining](https://arxiv.org/abs/1503.03832) * [Npairs loss](http://www.nec-labs.com/uploads/images/Department-Images/MediaAnalytics/papers/nips16_npairmetriclearning.pdf) * Npairs loss w/ multilabel support * [Lifted structured loss](http://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Song_Deep_Metric_Learning_CVPR_2016_paper.pdf) * [Structured clustering embedding](https://arxiv.org/abs/1612.01213) PiperOrigin-RevId: 171997156
author: A. Unique TensorFlower <gardener@tensorflow.org> 2017-10-12 12:41:40 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-10-12 12:45:35 -0700
commit: deb72df13e588eda07968adc306c1a87416cf7fc (patch)
tree: b60b8cc80aee43e2be5b89d50858148f70c96618 /tensorflow/contrib/losses
parent: 453515d166a7d3a42e7075f2267ba85a9ff25b96 (diff)
5 files changed, 1678 insertions, 1 deletions
diff --git a/tensorflow/contrib/losses/BUILD b/tensorflow/contrib/losses/BUILD
index f75b0aa1b3..33fbbe12d3 100644
--- a/tensorflow/contrib/losses/BUILD
+++ b/tensorflow/contrib/losses/BUILD
@@ -15,6 +15,7 @@ py_library(
         "__init__.py",
         "python/losses/__init__.py",
         "python/losses/loss_ops.py",
+        "python/metric_learning/metric_loss_ops.py",
     ],
     srcs_version = "PY2AND3",
     deps = [
@@ -50,6 +51,49 @@ py_test(
     ],
 )
 
+py_library(
+    name = "metric_learning_py",
+    srcs = [
+        "python/metric_learning/__init__.py",
+        "python/metric_learning/metric_loss_ops.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/contrib/framework:framework_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:nn",
+        "//tensorflow/python:nn_ops",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python:util",
+    ],
+)
+
+py_test(
+    name = "metric_loss_ops_test",
+    srcs = [
+        "python/metric_learning/metric_loss_ops_test.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":metric_learning_py",
+        "//tensorflow/contrib/framework:framework_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:init_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:random_ops",
+        "//tensorflow/python:random_seed",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
+        "//third_party/py/numpy",
+    ],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/losses/__init__.py b/tensorflow/contrib/losses/__init__.py
index 790bf61367..db58647d48 100644
--- a/tensorflow/contrib/losses/__init__.py
+++ b/tensorflow/contrib/losses/__init__.py
@@ -22,10 +22,10 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.contrib.losses.python import metric_learning
 # pylint: disable=wildcard-import
 from tensorflow.contrib.losses.python.losses import *
 # pylint: enable=wildcard-import
-
 from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = [
@@ -43,5 +43,6 @@ _allowed_symbols = [
     'sigmoid_cross_entropy',
     'softmax_cross_entropy',
     'sparse_softmax_cross_entropy',
+    'metric_learning'
 ]
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/losses/python/metric_learning/__init__.py b/tensorflow/contrib/losses/python/metric_learning/__init__.py
new file mode 100644
index 0000000000..4e551d6aca
--- /dev/null
+++ b/tensorflow/contrib/losses/python/metric_learning/__init__.py
@@ -0,0 +1,39 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Ops for building neural network losses.
+
+See @{$python/contrib.losses}.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# pylint: disable=wildcard-import
+from tensorflow.contrib.losses.python.metric_learning.metric_loss_ops import *
+# pylint: enable=wildcard-import
+from tensorflow.python.util.all_util import remove_undocumented
+
+_allowed_symbols = [
+    'contrastive_loss',
+    'cluster_loss',
+    'lifted_struct_loss',
+    'npairs_loss',
+    'npairs_loss_multilabel',
+    'triplet_semihard_loss',
+]
+remove_undocumented(__name__, _allowed_symbols)
+
+
diff --git a/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops.py b/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops.py
new file mode 100644
index 0000000000..c3a57ba51b
--- /dev/null
+++ b/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops.py
@@ -0,0 +1,1031 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implements various metric learning losses."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import logging_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import script_ops
+from tensorflow.python.ops import sparse_ops
+from tensorflow.python.summary import summary
+try:
+  # pylint: disable=g-import-not-at-top
+  from sklearn import metrics
+  HAS_SKLEARN = True
+except ImportError:
+  HAS_SKLEARN = False
+
+
+def pairwise_distance(feature, squared=False):
+  """Computes the pairwise distance matrix with numerical stability.
+
+  output[i, j] = || feature[i, :] - feature[j, :] ||_2
+
+  Args:
+    feature: 2-D Tensor of size [number of data, feature dimension].
+    squared: Boolean, whether or not to square the pairwise distances.
+
+  Returns:
+    pairwise_distances: 2-D Tensor of size [number of data, number of data].
+  """
+  pairwise_distances_squared = math_ops.add(
+      math_ops.reduce_sum(
+          math_ops.square(feature),
+          axis=[1],
+          keep_dims=True),
+      math_ops.reduce_sum(
+          math_ops.square(
+              array_ops.transpose(feature)),
+          axis=[0],
+          keep_dims=True)) - 2.0 * math_ops.matmul(
+              feature, array_ops.transpose(feature))
+
+  # Deal with numerical inaccuracies. Set small negatives to zero.
+  pairwise_distances_squared = math_ops.maximum(pairwise_distances_squared, 0.0)
+  # Get the mask where the zero distances are at.
+  error_mask = math_ops.less_equal(pairwise_distances_squared, 0.0)
+
+  # Optionally take the sqrt.
+  if squared:
+    pairwise_distances = pairwise_distances_squared
+  else:
+    pairwise_distances = math_ops.sqrt(
+        pairwise_distances_squared + math_ops.to_float(error_mask) * 1e-16)
+
+  # Undo conditionally adding 1e-16.
+  pairwise_distances = math_ops.multiply(
+      pairwise_distances, math_ops.to_float(math_ops.logical_not(error_mask)))
+
+  num_data = array_ops.shape(feature)[0]
+  # Explicitly set diagonals to zero.
+  mask_offdiagonals = array_ops.ones_like(pairwise_distances) - array_ops.diag(
+      array_ops.ones([num_data]))
+  pairwise_distances = math_ops.multiply(pairwise_distances, mask_offdiagonals)
+  return pairwise_distances
+
+
+def contrastive_loss(labels, embeddings_anchor, embeddings_positive,
+                     margin=1.0):
+  """Computes the contrastive loss.
+
+  This loss encourages the embedding to be close to each other for
+    the samples of the same label and the embedding to be far apart at least
+    by the margin constant for the samples of different labels.
+  See: http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
+
+  Args:
+    labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
+      binary labels indicating positive vs negative pair.
+    embeddings_anchor: 2-D float `Tensor` of embedding vectors for the anchor
+      images. Embeddings should be l2 normalized.
+    embeddings_positive: 2-D float `Tensor` of embedding vectors for the
+      positive images. Embeddings should be l2 normalized.
+    margin: margin term in the loss definition.
+
+  Returns:
+    contrastive_loss: tf.float32 scalar.
+  """
+  # Get per pair distances
+  distances = math_ops.sqrt(
+      math_ops.reduce_sum(
+          math_ops.square(embeddings_anchor - embeddings_positive), 1))
+
+  # Add contrastive loss for the siamese network.
+  #   label here is {0,1} for neg, pos.
+  return math_ops.reduce_mean(
+      math_ops.to_float(labels) * math_ops.square(distances) +
+      (1. - math_ops.to_float(labels)) *
+      math_ops.square(math_ops.maximum(margin - distances, 0.)),
+      name='contrastive_loss')
+
+
+def masked_maximum(data, mask, dim=1):
+  """Computes the axis wise maximum over chosen elements.
+
+  Args:
+    data: 2-D float `Tensor` of size [n, m].
+    mask: 2-D Boolean `Tensor` of size [n, m].
+    dim: The dimension over which to compute the maximum.
+
+  Returns:
+    masked_maximums: N-D `Tensor`.
+      The maximized dimension is of size 1 after the operation.
+  """
+  axis_minimums = math_ops.reduce_min(data, dim, keep_dims=True)
+  masked_maximums = math_ops.reduce_max(
+      math_ops.multiply(
+          data - axis_minimums, mask), dim, keep_dims=True) + axis_minimums
+  return masked_maximums
+
+
+def masked_minimum(data, mask, dim=1):
+  """Computes the axis wise minimum over chosen elements.
+
+  Args:
+    data: 2-D float `Tensor` of size [n, m].
+    mask: 2-D Boolean `Tensor` of size [n, m].
+    dim: The dimension over which to compute the minimum.
+
+  Returns:
+    masked_minimums: N-D `Tensor`.
+      The minimized dimension is of size 1 after the operation.
+  """
+  axis_maximums = math_ops.reduce_max(data, dim, keep_dims=True)
+  masked_minimums = math_ops.reduce_min(
+      math_ops.multiply(
+          data - axis_maximums, mask), dim, keep_dims=True) + axis_maximums
+  return masked_minimums
+
+
+def triplet_semihard_loss(labels, embeddings, margin=1.0):
+  """Computes the triplet loss with semi-hard negative mining.
+
+  The loss encourages the positive distances (between a pair of embeddings with
+  the same labels) to be smaller than the minimum negative distance among
+  which are at least greater than the positive distance plus the margin constant
+  (called semi-hard negative) in the mini-batch. If no such negative exists,
+  uses the largest negative distance instead.
+  See: https://arxiv.org/abs/1503.03832.
+
+  Args:
+    labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
+      multiclass integer labels.
+    embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should
+      be l2 normalized.
+    margin: Float, margin term in the loss definition.
+
+  Returns:
+    triplet_loss: tf.float32 scalar.
+  """
+  # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
+  lshape = array_ops.shape(labels)
+  assert lshape.shape == 1
+  labels = array_ops.reshape(labels, [lshape[0], 1])
+
+  # Build pairwise squared distance matrix.
+  pdist_matrix = pairwise_distance(embeddings, squared=True)
+  # Build pairwise binary adjacency matrix.
+  adjacency = math_ops.equal(labels, array_ops.transpose(labels))
+  # Invert so we can select negatives only.
+  adjacency_not = math_ops.logical_not(adjacency)
+
+  batch_size = array_ops.size(labels)
+
+  # Compute the mask.
+  pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
+  mask = math_ops.logical_and(
+      array_ops.tile(adjacency_not, [batch_size, 1]),
+      math_ops.greater(
+          pdist_matrix_tile, array_ops.reshape(
+              array_ops.transpose(pdist_matrix), [-1, 1])))
+  mask_final = array_ops.reshape(
+      math_ops.greater(
+          math_ops.reduce_sum(
+              math_ops.cast(
+                  mask, dtype=dtypes.float32), 1, keep_dims=True),
+          0.0), [batch_size, batch_size])
+  mask_final = array_ops.transpose(mask_final)
+
+  adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
+  mask = math_ops.cast(mask, dtype=dtypes.float32)
+
+  # negatives_outside: smallest D_an where D_an > D_ap.
+  negatives_outside = array_ops.reshape(
+      masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
+  negatives_outside = array_ops.transpose(negatives_outside)
+
+  # negatives_inside: largest D_an.
+  negatives_inside = array_ops.tile(
+      masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
+  semi_hard_negatives = array_ops.where(
+      mask_final, negatives_outside, negatives_inside)
+
+  loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives)
+
+  mask_positives = math_ops.cast(
+      adjacency, dtype=dtypes.float32) - array_ops.diag(
+          array_ops.ones([batch_size]))
+
+  # In lifted-struct, the authors multiply 0.5 for upper triangular
+  #   in semihard, they take all positive pairs except the diagonal.
+  num_positives = math_ops.reduce_sum(mask_positives)
+
+  triplet_loss = math_ops.truediv(
+      math_ops.reduce_sum(
+          math_ops.maximum(
+              math_ops.multiply(loss_mat, mask_positives), 0.0)),
+      num_positives,
+      name='triplet_semihard_loss')
+
+  return triplet_loss
+
+
+# pylint: disable=line-too-long
+def npairs_loss(labels, embeddings_anchor, embeddings_positive,
+                reg_lambda=0.002, print_losses=False):
+  """Computes the npairs loss.
+
+  Npairs loss expects paired data where a pair is composed of samples from the
+  same labels and each pairs in the minibatch have different labels. The loss
+  has two components. The first component is the L2 regularizer on the
+  embedding vectors. The second component is the sum of cross entropy loss
+  which takes each row of the pair-wise similarity matrix as logits and
+  the remapped one-hot labels as labels.
+
+  See: http://www.nec-labs.com/uploads/images/Department-Images/MediaAnalytics/papers/nips16_npairmetriclearning.pdf
+
+  Args:
+    labels: 1-D tf.int32 `Tensor` of shape [batch_size/2].
+    embeddings_anchor: 2-D Tensor of shape [batch_size/2, embedding_dim] for the
+      embedding vectors for the anchor images. Embeddings should not be
+      l2 normalized.
+    embeddings_positive: 2-D Tensor of shape [batch_size/2, embedding_dim] for the
+      embedding vectors for the positive images. Embeddings should not be
+      l2 normalized.
+    reg_lambda: Float. L2 regularization term on the embedding vectors.
+    print_losses: Boolean. Option to print the xent and l2loss.
+
+  Returns:
+    npairs_loss: tf.float32 scalar.
+  """
+  # pylint: enable=line-too-long
+  # Add the regularizer on the embedding.
+  reg_anchor = math_ops.reduce_mean(
+      math_ops.reduce_sum(math_ops.square(embeddings_anchor), 1))
+  reg_positive = math_ops.reduce_mean(
+      math_ops.reduce_sum(math_ops.square(embeddings_positive), 1))
+  l2loss = math_ops.multiply(
+      0.25 * reg_lambda, reg_anchor + reg_positive, name='l2loss')
+
+  # Get per pair similarities.
+  similarity_matrix = math_ops.matmul(
+      embeddings_anchor, embeddings_positive, transpose_a=False,
+      transpose_b=True)
+
+  # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
+  lshape = array_ops.shape(labels)
+  assert lshape.shape == 1
+  labels = array_ops.reshape(labels, [lshape[0], 1])
+
+  labels_remapped = math_ops.to_float(
+      math_ops.equal(labels, array_ops.transpose(labels)))
+  labels_remapped /= math_ops.reduce_sum(labels_remapped, 1, keep_dims=True)
+
+  # Add the softmax loss.
+  xent_loss = nn.softmax_cross_entropy_with_logits(
+      logits=similarity_matrix, labels=labels_remapped)
+  xent_loss = math_ops.reduce_mean(xent_loss, name='xentropy')
+
+  if print_losses:
+    xent_loss = logging_ops.Print(
+        xent_loss, ['cross entropy:', xent_loss, 'l2loss:', l2loss])
+
+  return l2loss + xent_loss
+
+
+def _build_multilabel_adjacency(sparse_labels):
+  """Builds multilabel adjacency matrix.
+
+  As of March 14th, 2017, there's no op for the dot product between
+  two sparse tensors in TF. However, there is `sparse_minimum` op which is
+  equivalent to an AND op between two sparse boolean tensors.
+  This computes the dot product between two sparse boolean inputs.
+
+  Args:
+    sparse_labels: List of 1-D boolean sparse tensors.
+
+  Returns:
+    adjacency_matrix: 2-D dense `Tensor`.
+  """
+  num_pairs = len(sparse_labels)
+  adjacency_matrix = array_ops.zeros([num_pairs, num_pairs])
+  for i in range(num_pairs):
+    for j in range(num_pairs):
+      sparse_dot_product = math_ops.to_float(
+          sparse_ops.sparse_reduce_sum(sparse_ops.sparse_minimum(
+              sparse_labels[i], sparse_labels[j])))
+      sparse_dot_product = array_ops.expand_dims(sparse_dot_product, 0)
+      sparse_dot_product = array_ops.expand_dims(sparse_dot_product, 1)
+      one_hot_matrix = array_ops.pad(sparse_dot_product,
+                                     [[i, num_pairs-i-1],
+                                      [j, num_pairs-j-1]], 'CONSTANT')
+      adjacency_matrix += one_hot_matrix
+
+  return adjacency_matrix
+
+
+def npairs_loss_multilabel(sparse_labels, embeddings_anchor,
+                           embeddings_positive, reg_lambda=0.002,
+                           print_losses=False):
+  r"""Computes the npairs loss with multilabel data.
+
+  Npairs loss expects paired data where a pair is composed of samples from the
+  same labels and each pairs in the minibatch have different labels. The loss
+  has two components. The first component is the L2 regularizer on the
+  embedding vectors. The second component is the sum of cross entropy loss
+  which takes each row of the pair-wise similarity matrix as logits and
+  the remapped one-hot labels as labels. Here, the similarity is defined by the
+  dot product between two embedding vectors. S_{i,j} = f(x_i)^T f(x_j)
+
+  To deal with multilabel inputs, we use the count of label intersection
+  i.e. L_{i,j} = | set_of_labels_for(i) \cap set_of_labels_for(j) |
+  Then we normalize each rows of the count based label matrix so that each row
+  sums to one.
+
+  Args:
+    sparse_labels: List of 1-D Boolean `SparseTensor` of dense_shape
+                   [batch_size/2, num_classes] labels for the anchor-pos pairs.
+    embeddings_anchor: 2-D `Tensor` of shape [batch_size/2, embedding_dim] for
+      the embedding vectors for the anchor images. Embeddings should not be
+      l2 normalized.
+    embeddings_positive: 2-D `Tensor` of shape [batch_size/2, embedding_dim] for
+      the embedding vectors for the positive images. Embeddings should not be
+      l2 normalized.
+    reg_lambda: Float. L2 regularization term on the embedding vectors.
+    print_losses: Boolean. Option to print the xent and l2loss.
+
+  Returns:
+    npairs_loss: tf.float32 scalar.
+  Raises:
+    TypeError: When the specified sparse_labels is not a `SparseTensor`.
+  """
+  if False in [isinstance(
+      l, sparse_tensor.SparseTensor) for l in sparse_labels]:
+    raise TypeError(
+        'sparse_labels must be a list of SparseTensors, but got %s' % str(
+            sparse_labels))
+
+  with ops.name_scope('NpairsLossMultiLabel'):
+    # Add the regularizer on the embedding.
+    reg_anchor = math_ops.reduce_mean(
+        math_ops.reduce_sum(math_ops.square(embeddings_anchor), 1))
+    reg_positive = math_ops.reduce_mean(
+        math_ops.reduce_sum(math_ops.square(embeddings_positive), 1))
+    l2loss = math_ops.multiply(0.25 * reg_lambda,
+                               reg_anchor + reg_positive, name='l2loss')
+
+    # Get per pair similarities.
+    similarity_matrix = math_ops.matmul(
+        embeddings_anchor, embeddings_positive, transpose_a=False,
+        transpose_b=True)
+
+    # TODO(coreylynch): need to check the sparse values
+    # TODO(coreylynch): are composed only of 0's and 1's.
+
+    multilabel_adjacency_matrix = _build_multilabel_adjacency(sparse_labels)
+    labels_remapped = math_ops.to_float(multilabel_adjacency_matrix)
+    labels_remapped /= math_ops.reduce_sum(labels_remapped, 1, keep_dims=True)
+
+    # Add the softmax loss.
+    xent_loss = nn.softmax_cross_entropy_with_logits(
+        logits=similarity_matrix, labels=labels_remapped)
+    xent_loss = math_ops.reduce_mean(xent_loss, name='xentropy')
+
+    if print_losses:
+      xent_loss = logging_ops.Print(
+          xent_loss, ['cross entropy:', xent_loss, 'l2loss:', l2loss])
+
+    return l2loss + xent_loss
+
+
+def lifted_struct_loss(labels, embeddings, margin=1.0):
+  """Computes the lifted structured loss.
+
+  The loss encourages the positive distances (between a pair of embeddings
+  with the same labels) to be smaller than any negative distances (between a
+  pair of embeddings with different labels) in the mini-batch in a way
+  that is differentiable with respect to the embedding vectors.
+  See: https://arxiv.org/abs/1511.06452.
+
+  Args:
+    labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
+      multiclass integer labels.
+    embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should not
+      be l2 normalized.
+    margin: Float, margin term in the loss definition.
+
+  Returns:
+    lifted_loss: tf.float32 scalar.
+  """
+  # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
+  lshape = array_ops.shape(labels)
+  assert lshape.shape == 1
+  labels = array_ops.reshape(labels, [lshape[0], 1])
+
+  # Build pairwise squared distance matrix.
+  pairwise_distances = pairwise_distance(embeddings)
+
+  # Build pairwise binary adjacency matrix.
+  adjacency = math_ops.equal(labels, array_ops.transpose(labels))
+  # Invert so we can select negatives only.
+  adjacency_not = math_ops.logical_not(adjacency)
+
+  batch_size = array_ops.size(labels)
+
+  diff = margin - pairwise_distances
+  mask = math_ops.cast(adjacency_not, dtype=dtypes.float32)
+  # Safe maximum: Temporarily shift negative distances
+  #   above zero before taking max.
+  #     this is to take the max only among negatives.
+  row_minimums = math_ops.reduce_min(diff, 1, keep_dims=True)
+  row_negative_maximums = math_ops.reduce_max(
+      math_ops.multiply(
+          diff - row_minimums, mask), 1, keep_dims=True) + row_minimums
+
+  # Compute the loss.
+  # Keep track of matrix of maximums where M_ij = max(m_i, m_j)
+  #   where m_i is the max of alpha - negative D_i's.
+  # This matches the Caffe loss layer implementation at:
+  #   https://github.com/rksltnl/Caffe-Deep-Metric-Learning-CVPR16/blob/0efd7544a9846f58df923c8b992198ba5c355454/src/caffe/layers/lifted_struct_similarity_softmax_layer.cpp  # pylint: disable=line-too-long
+
+  max_elements = math_ops.maximum(
+      row_negative_maximums, array_ops.transpose(row_negative_maximums))
+  diff_tiled = array_ops.tile(diff, [batch_size, 1])
+  mask_tiled = array_ops.tile(mask, [batch_size, 1])
+  max_elements_vect = array_ops.reshape(
+      array_ops.transpose(max_elements), [-1, 1])
+
+  loss_exp_left = array_ops.reshape(
+      math_ops.reduce_sum(math_ops.multiply(
+          math_ops.exp(
+              diff_tiled - max_elements_vect),
+          mask_tiled), 1, keep_dims=True), [batch_size, batch_size])
+
+  loss_mat = max_elements + math_ops.log(
+      loss_exp_left + array_ops.transpose(loss_exp_left))
+  # Add the positive distance.
+  loss_mat += pairwise_distances
+
+  mask_positives = math_ops.cast(
+      adjacency, dtype=dtypes.float32) - array_ops.diag(
+          array_ops.ones([batch_size]))
+
+  # *0.5 for upper triangular, and another *0.5 for 1/2 factor for loss^2.
+  num_positives = math_ops.reduce_sum(mask_positives) / 2.0
+
+  lifted_loss = math_ops.truediv(
+      0.25 * math_ops.reduce_sum(
+          math_ops.square(
+              math_ops.maximum(
+                  math_ops.multiply(loss_mat, mask_positives), 0.0))),
+      num_positives,
+      name='liftedstruct_loss')
+  return lifted_loss
+
+
+def update_1d_tensor(y, index, value):
+  """Updates 1d tensor y so that y[index] = value.
+
+  Args:
+    y: 1-D Tensor.
+    index: index of y to modify.
+    value: new value to write at y[index].
+
+  Returns:
+    y_mod: 1-D Tensor. Tensor y after the update.
+  """
+  value = array_ops.squeeze(value)
+  # modify the 1D tensor x at index with value.
+  # ex) chosen_ids = update_1D_tensor(chosen_ids, cluster_idx, best_medoid)
+  y_before = array_ops.slice(y, [0], [index])
+  y_after = array_ops.slice(y, [index + 1], [-1])
+  y_mod = array_ops.concat([y_before, [value], y_after], 0)
+  return y_mod
+
+
+def get_cluster_assignment(pairwise_distances, centroid_ids):
+  """Assign data points to the neareset centroids.
+
+  Tensorflow has numerical instability and doesn't always choose
+    the data point with theoretically zero distance as it's nearest neighbor.
+    Thus, for each centroid in centroid_ids, explicitly assign
+    the centroid itself as the nearest centroid.
+    This is done through the mask tensor and the constraint_vect tensor.
+
+  Args:
+    pairwise_distances: 2-D Tensor of pairwise distances.
+    centroid_ids: 1-D Tensor of centroid indices.
+
+  Returns:
+    y_fixed: 1-D tensor of cluster assignment.
+  """
+  predictions = math_ops.argmin(
+      array_ops.gather(pairwise_distances, centroid_ids), dimension=0)
+  batch_size = array_ops.shape(pairwise_distances)[0]
+
+  # Deal with numerical instability
+  mask = math_ops.reduce_any(array_ops.one_hot(
+      centroid_ids, batch_size, True, False, axis=-1, dtype=dtypes.bool),
+                             axis=0)
+  constraint_one_hot = math_ops.multiply(
+      array_ops.one_hot(centroid_ids,
+                        batch_size,
+                        array_ops.constant(1, dtype=dtypes.int64),
+                        array_ops.constant(0, dtype=dtypes.int64),
+                        axis=0,
+                        dtype=dtypes.int64),
+      math_ops.to_int64(math_ops.range(array_ops.shape(centroid_ids)[0])))
+  constraint_vect = math_ops.reduce_sum(
+      array_ops.transpose(constraint_one_hot), axis=0)
+
+  y_fixed = array_ops.where(mask, constraint_vect, predictions)
+  return y_fixed
+
+
+def compute_facility_energy(pairwise_distances, centroid_ids):
+  """Compute the average travel distance to the assigned centroid.
+
+  Args:
+    pairwise_distances: 2-D Tensor of pairwise distances.
+    centroid_ids: 1-D Tensor of indices.
+
+  Returns:
+    facility_energy: dtypes.float32 scalar.
+  """
+  return -1.0 * math_ops.reduce_sum(
+      math_ops.reduce_min(
+          array_ops.gather(pairwise_distances, centroid_ids), axis=0))
+
+
+def compute_clustering_score(labels, predictions, margin_type):
+  """Computes the clustering score via sklearn.metrics functions.
+
+  There are various ways to compute the clustering score. Intuitively,
+  we want to measure the agreement of two clustering assignments (labels vs
+  predictions) ignoring the permutations and output a score from zero to one.
+  (where the values close to one indicate significant agreement).
+  This code supports following scoring functions:
+    nmi: normalized mutual information
+    ami: adjusted mutual information
+    ari: adjusted random index
+    vmeasure: v-measure
+    const: indicator checking whether the two clusterings are the same.
+  See http://scikit-learn.org/stable/modules/classes.html#clustering-metrics
+    for the detailed descriptions.
+  Args:
+    labels: 1-D Tensor. ground truth cluster assignment.
+    predictions: 1-D Tensor. predicted cluster assignment.
+    margin_type: Type of structured margin to use. Default is nmi.
+  Returns:
+    clustering_score: dtypes.float32 scalar.
+      The possible valid values are from zero to one.
+      Zero means the worst clustering and one means the perfect clustering.
+  Raises:
+    ValueError: margin_type is not recognized.
+  """
+  margin_type_to_func = {
+      'nmi': _compute_nmi_score,
+      'ami': _compute_ami_score,
+      'ari': _compute_ari_score,
+      'vmeasure': _compute_vmeasure_score,
+      'const': _compute_zeroone_score
+  }
+
+  if margin_type not in margin_type_to_func:
+    raise ValueError('Unrecognized margin_type: %s' % margin_type)
+  clustering_score_fn = margin_type_to_func[margin_type]
+  return array_ops.squeeze(clustering_score_fn(labels, predictions))
+
+
+def _compute_nmi_score(labels, predictions):
+  return math_ops.to_float(
+      script_ops.py_func(
+          metrics.normalized_mutual_info_score, [labels, predictions],
+          [dtypes.float64],
+          name='nmi'))
+
+
+def _compute_ami_score(labels, predictions):
+  ami_score = math_ops.to_float(
+      script_ops.py_func(
+          metrics.adjusted_mutual_info_score, [labels, predictions],
+          [dtypes.float64],
+          name='ami'))
+  return math_ops.maximum(0.0, ami_score)
+
+
+def _compute_ari_score(labels, predictions):
+  ari_score = math_ops.to_float(
+      script_ops.py_func(
+          metrics.adjusted_rand_score, [labels, predictions], [dtypes.float64],
+          name='ari'))
+  # ari score can go below 0
+  # http://scikit-learn.org/stable/modules/clustering.html#adjusted-rand-score
+  return math_ops.maximum(0.0, ari_score)
+
+
+def _compute_vmeasure_score(labels, predictions):
+  vmeasure_score = math_ops.to_float(
+      script_ops.py_func(
+          metrics.v_measure_score, [labels, predictions], [dtypes.float64],
+          name='vmeasure'))
+  return math_ops.maximum(0.0, vmeasure_score)
+
+
+def _compute_zeroone_score(labels, predictions):
+  zeroone_score = math_ops.to_float(
+      math_ops.equal(
+          math_ops.reduce_sum(
+              math_ops.to_int32(math_ops.equal(labels, predictions))),
+          array_ops.shape(labels)[0]))
+  return zeroone_score
+
+
+def _find_loss_augmented_facility_idx(pairwise_distances, labels, chosen_ids,
+                                      candidate_ids, margin_multiplier,
+                                      margin_type):
+  """Find the next centroid that maximizes the loss augmented inference.
+
+  This function is a subroutine called from compute_augmented_facility_locations
+
+  Args:
+    pairwise_distances: 2-D Tensor of pairwise distances.
+    labels: 1-D Tensor of ground truth cluster assignment.
+    chosen_ids: 1-D Tensor of current centroid indices.
+    candidate_ids: 1-D Tensor of candidate indices.
+    margin_multiplier: multiplication constant.
+    margin_type: Type of structured margin to use. Default is nmi.
+
+  Returns:
+    integer index.
+  """
+  num_candidates = array_ops.shape(candidate_ids)[0]
+
+  pairwise_distances_chosen = array_ops.gather(pairwise_distances, chosen_ids)
+  pairwise_distances_candidate = array_ops.gather(
+      pairwise_distances, candidate_ids)
+  pairwise_distances_chosen_tile = array_ops.tile(
+      pairwise_distances_chosen, [1, num_candidates])
+
+  candidate_scores = -1.0 * math_ops.reduce_sum(
+      array_ops.reshape(
+          math_ops.reduce_min(
+              array_ops.concat([
+                  pairwise_distances_chosen_tile,
+                  array_ops.reshape(pairwise_distances_candidate, [1, -1])
+              ], 0),
+              axis=0,
+              keep_dims=True), [num_candidates, -1]),
+      axis=1)
+
+  nmi_scores = array_ops.zeros([num_candidates])
+  iteration = array_ops.constant(0)
+
+  def func_cond(iteration, nmi_scores):
+    del nmi_scores  # Unused in func_cond()
+    return iteration < num_candidates
+
+  def func_body(iteration, nmi_scores):
+    predictions = get_cluster_assignment(
+        pairwise_distances,
+        array_ops.concat([chosen_ids, [candidate_ids[iteration]]], 0))
+    nmi_score_i = compute_clustering_score(labels, predictions, margin_type)
+    pad_before = array_ops.zeros([iteration])
+    pad_after = array_ops.zeros([num_candidates - 1 - iteration])
+    # return 1 - NMI score as the structured loss.
+    #   because NMI is higher the better [0,1].
+    return iteration + 1, nmi_scores + array_ops.concat(
+        [pad_before, [1.0 - nmi_score_i], pad_after], 0)
+
+  _, nmi_scores = control_flow_ops.while_loop(
+      func_cond, func_body, [iteration, nmi_scores])
+
+  candidate_scores = math_ops.add(
+      candidate_scores, margin_multiplier * nmi_scores)
+
+  argmax_index = math_ops.to_int32(
+      math_ops.argmax(candidate_scores, dimension=0))
+
+  return candidate_ids[argmax_index]
+
+
+def compute_augmented_facility_locations(pairwise_distances, labels, all_ids,
+                                         margin_multiplier, margin_type):
+  """Computes the centroid locations.
+
+  Args:
+    pairwise_distances: 2-D Tensor of pairwise distances.
+    labels: 1-D Tensor of ground truth cluster assignment.
+    all_ids: 1-D Tensor of all data indices.
+    margin_multiplier: multiplication constant.
+    margin_type: Type of structured margin to use. Default is nmi.
+
+  Returns:
+    chosen_ids: 1-D Tensor of chosen centroid indices.
+  """
+
+  def func_cond_augmented(iteration, chosen_ids):
+    del chosen_ids  # Unused argument in func_cond_augmented.
+    return iteration < num_classes
+
+  def func_body_augmented(iteration, chosen_ids):
+    # find a new facility location to add
+    #  based on the clustering score and the NMI score
+    candidate_ids = array_ops.setdiff1d(all_ids, chosen_ids)[0]
+    new_chosen_idx = _find_loss_augmented_facility_idx(pairwise_distances,
+                                                       labels, chosen_ids,
+                                                       candidate_ids,
+                                                       margin_multiplier,
+                                                       margin_type)
+    chosen_ids = array_ops.concat([chosen_ids, [new_chosen_idx]], 0)
+    return iteration + 1, chosen_ids
+
+  num_classes = array_ops.size(array_ops.unique(labels)[0])
+  chosen_ids = array_ops.constant(0, dtype=dtypes.int32, shape=[0])
+
+  # num_classes get determined at run time based on the sampled batch.
+  iteration = array_ops.constant(0)
+
+  _, chosen_ids = control_flow_ops.while_loop(
+      func_cond_augmented,
+      func_body_augmented, [iteration, chosen_ids],
+      shape_invariants=[iteration.get_shape(), tensor_shape.TensorShape(
+          [None])])
+  return chosen_ids
+
+
+def update_medoid_per_cluster(pairwise_distances, pairwise_distances_subset,
+                              labels, chosen_ids, cluster_member_ids,
+                              cluster_idx, margin_multiplier, margin_type):
+  """Updates the cluster medoid per cluster.
+
+  Args:
+    pairwise_distances: 2-D Tensor of pairwise distances.
+    pairwise_distances_subset: 2-D Tensor of pairwise distances for one cluster.
+    labels: 1-D Tensor of ground truth cluster assignment.
+    chosen_ids: 1-D Tensor of cluster centroid indices.
+    cluster_member_ids: 1-D Tensor of cluster member indices for one cluster.
+    cluster_idx: Index of this one cluster.
+    margin_multiplier: multiplication constant.
+    margin_type: Type of structured margin to use. Default is nmi.
+
+  Returns:
+    chosen_ids: Updated 1-D Tensor of cluster centroid indices.
+  """
+
+  def func_cond(iteration, scores_margin):
+    del scores_margin  # Unused variable scores_margin.
+    return iteration < num_candidates
+
+  def func_body(iteration, scores_margin):
+    # swap the current medoid with the candidate cluster member
+    candidate_medoid = math_ops.to_int32(cluster_member_ids[iteration])
+    tmp_chosen_ids = update_1d_tensor(chosen_ids, cluster_idx, candidate_medoid)
+    predictions = get_cluster_assignment(pairwise_distances, tmp_chosen_ids)
+    metric_score = compute_clustering_score(labels, predictions, margin_type)
+    pad_before = array_ops.zeros([iteration])
+    pad_after = array_ops.zeros([num_candidates - 1 - iteration])
+    return iteration + 1, scores_margin + array_ops.concat(
+        [pad_before, [1.0 - metric_score], pad_after], 0)
+
+  # pairwise_distances_subset is of size [p, 1, 1, p],
+  #   the intermediate dummy dimensions at
+  #   [1, 2] makes this code work in the edge case where p=1.
+  #   this happens if the cluster size is one.
+  scores_fac = -1.0 * math_ops.reduce_sum(
+      array_ops.squeeze(pairwise_distances_subset, [1, 2]), axis=0)
+
+  iteration = array_ops.constant(0)
+  num_candidates = array_ops.size(cluster_member_ids)
+  scores_margin = array_ops.zeros([num_candidates])
+
+  _, scores_margin = control_flow_ops.while_loop(func_cond, func_body,
+                                                 [iteration, scores_margin])
+  candidate_scores = math_ops.add(scores_fac, margin_multiplier * scores_margin)
+
+  argmax_index = math_ops.to_int32(
+      math_ops.argmax(candidate_scores, dimension=0))
+
+  best_medoid = math_ops.to_int32(cluster_member_ids[argmax_index])
+  chosen_ids = update_1d_tensor(chosen_ids, cluster_idx, best_medoid)
+  return chosen_ids
+
+
+def update_all_medoids(pairwise_distances, predictions, labels, chosen_ids,
+                       margin_multiplier, margin_type):
+  """Updates all cluster medoids a cluster at a time.
+
+  Args:
+    pairwise_distances: 2-D Tensor of pairwise distances.
+    predictions: 1-D Tensor of predicted cluster assignment.
+    labels: 1-D Tensor of ground truth cluster assignment.
+    chosen_ids: 1-D Tensor of cluster centroid indices.
+    margin_multiplier: multiplication constant.
+    margin_type: Type of structured margin to use. Default is nmi.
+
+  Returns:
+    chosen_ids: Updated 1-D Tensor of cluster centroid indices.
+  """
+
+  def func_cond_augmented_pam(iteration, chosen_ids):
+    del chosen_ids  # Unused argument.
+    return iteration < num_classes
+
+  def func_body_augmented_pam(iteration, chosen_ids):
+    """Call the update_medoid_per_cluster subroutine."""
+    mask = math_ops.equal(
+        math_ops.to_int64(predictions), math_ops.to_int64(iteration))
+    this_cluster_ids = array_ops.where(mask)
+
+    pairwise_distances_subset = array_ops.transpose(
+        array_ops.gather(
+            array_ops.transpose(
+                array_ops.gather(pairwise_distances, this_cluster_ids)),
+            this_cluster_ids))
+
+    chosen_ids = update_medoid_per_cluster(pairwise_distances,
+                                           pairwise_distances_subset, labels,
+                                           chosen_ids, this_cluster_ids,
+                                           iteration, margin_multiplier,
+                                           margin_type)
+    return iteration + 1, chosen_ids
+
+  unique_class_ids = array_ops.unique(labels)[0]
+  num_classes = array_ops.size(unique_class_ids)
+  iteration = array_ops.constant(0)
+
+  _, chosen_ids = control_flow_ops.while_loop(
+      func_cond_augmented_pam, func_body_augmented_pam, [iteration, chosen_ids])
+  return chosen_ids
+
+
+def compute_augmented_facility_locations_pam(pairwise_distances,
+                                             labels,
+                                             margin_multiplier,
+                                             margin_type,
+                                             chosen_ids,
+                                             pam_max_iter=5):
+  """Refine the cluster centroids with PAM local search.
+
+  For fixed iterations, alternate between updating the cluster assignment
+    and updating cluster medoids.
+
+  Args:
+    pairwise_distances: 2-D Tensor of pairwise distances.
+    labels: 1-D Tensor of ground truth cluster assignment.
+    margin_multiplier: multiplication constant.
+    margin_type: Type of structured margin to use. Default is nmi.
+    chosen_ids: 1-D Tensor of initial estimate of cluster centroids.
+    pam_max_iter: Number of refinement iterations.
+
+  Returns:
+    chosen_ids: Updated 1-D Tensor of cluster centroid indices.
+  """
+  for _ in range(pam_max_iter):
+    # update the cluster assignment given the chosen_ids (S_pred)
+    predictions = get_cluster_assignment(pairwise_distances, chosen_ids)
+
+    # update the medoids per each cluster
+    chosen_ids = update_all_medoids(pairwise_distances, predictions, labels,
+                                    chosen_ids, margin_multiplier, margin_type)
+
+  return chosen_ids
+
+
+def compute_gt_cluster_score(pairwise_distances, labels):
+  """Compute ground truth facility location score.
+
+  Loop over each unique classes and compute average travel distances.
+
+  Args:
+    pairwise_distances: 2-D Tensor of pairwise distances.
+    labels: 1-D Tensor of ground truth cluster assignment.
+
+  Returns:
+    gt_cluster_score: dtypes.float32 score.
+  """
+  unique_class_ids = array_ops.unique(labels)[0]
+  num_classes = array_ops.size(unique_class_ids)
+  iteration = array_ops.constant(0)
+  gt_cluster_score = array_ops.constant(0.0, dtype=dtypes.float32)
+
+  def func_cond(iteration, gt_cluster_score):
+    del gt_cluster_score  # Unused argument.
+    return iteration < num_classes
+
+  def func_body(iteration, gt_cluster_score):
+    """Per each cluster, compute the average travel distance."""
+    mask = math_ops.equal(labels, unique_class_ids[iteration])
+    this_cluster_ids = array_ops.where(mask)
+    pairwise_distances_subset = array_ops.transpose(
+        array_ops.gather(
+            array_ops.transpose(
+                array_ops.gather(pairwise_distances, this_cluster_ids)),
+            this_cluster_ids))
+    this_cluster_score = -1.0 * math_ops.reduce_min(
+        math_ops.reduce_sum(
+            pairwise_distances_subset, axis=0))
+    return iteration + 1, gt_cluster_score + this_cluster_score
+
+  _, gt_cluster_score = control_flow_ops.while_loop(
+      func_cond, func_body, [iteration, gt_cluster_score])
+  return gt_cluster_score
+
+
+def cluster_loss(labels,
+                 embeddings,
+                 margin_multiplier,
+                 enable_pam_finetuning=True,
+                 margin_type='nmi',
+                 print_losses=False):
+  """Computes the clustering loss.
+
+  The following structured margins are supported:
+    nmi: normalized mutual information
+    ami: adjusted mutual information
+    ari: adjusted random index
+    vmeasure: v-measure
+    const: indicator checking whether the two clusterings are the same.
+
+  Args:
+    labels: 2-D Tensor of labels of shape [batch size, 1]
+    embeddings: 2-D Tensor of embeddings of shape
+      [batch size, embedding dimension]. Embeddings should be l2 normalized.
+    margin_multiplier: float32 scalar. multiplier on the structured margin term
+      See section 3.2 of paper for discussion.
+    enable_pam_finetuning: Boolean, Whether to run local pam refinement.
+      See section 3.4 of paper for discussion.
+    margin_type: Type of structured margin to use. See section 3.2 of
+      paper for discussion. Can be 'nmi', 'ami', 'ari', 'vmeasure', 'const'.
+    print_losses: Boolean. Option to print the loss.
+
+  Paper: https://arxiv.org/abs/1612.01213.
+
+  Returns:
+    clustering_loss: A float32 scalar `Tensor`.
+  Raises:
+    ImportError: If sklearn dependency is not installed.
+  """
+  if not HAS_SKLEARN:
+    raise ImportError('Cluster loss depends on sklearn.')
+  pairwise_distances = pairwise_distance(embeddings)
+  labels = array_ops.squeeze(labels)
+  all_ids = math_ops.range(array_ops.shape(embeddings)[0])
+
+  # Compute the loss augmented inference and get the cluster centroids.
+  chosen_ids = compute_augmented_facility_locations(pairwise_distances, labels,
+                                                    all_ids, margin_multiplier,
+                                                    margin_type)
+  # Given the predicted centroids, compute the clustering score.
+  score_pred = compute_facility_energy(pairwise_distances, chosen_ids)
+
+  # Branch whether to use PAM finetuning.
+  if enable_pam_finetuning:
+    # Initialize with augmented facility solution.
+    chosen_ids = compute_augmented_facility_locations_pam(pairwise_distances,
+                                                          labels,
+                                                          margin_multiplier,
+                                                          margin_type,
+                                                          chosen_ids)
+    score_pred = compute_facility_energy(pairwise_distances, chosen_ids)
+
+  # Given the predicted centroids, compute the cluster assignments.
+  predictions = get_cluster_assignment(pairwise_distances, chosen_ids)
+
+  # Compute the clustering (i.e. NMI) score between the two assignments.
+  clustering_score_pred = compute_clustering_score(labels, predictions,
+                                                   margin_type)
+
+  # Compute the clustering score from labels.
+  score_gt = compute_gt_cluster_score(pairwise_distances, labels)
+
+  # Compute the hinge loss.
+  clustering_loss = math_ops.maximum(
+      score_pred + margin_multiplier * (1.0 - clustering_score_pred) - score_gt,
+      0.0,
+      name='clustering_loss')
+  clustering_loss.set_shape([])
+
+  if print_losses:
+    clustering_loss = logging_ops.Print(
+        clustering_loss,
+        ['clustering_loss: ', clustering_loss, array_ops.shape(
+            clustering_loss)])
+
+  # Clustering specific summary.
+  summary.scalar('losses/score_pred', score_pred)
+  summary.scalar('losses/' + margin_type, clustering_score_pred)
+  summary.scalar('losses/score_gt', score_gt)
+
+  return clustering_loss
diff --git a/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops_test.py b/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops_test.py
new file mode 100644
index 0000000000..4ec539ab42
--- /dev/null
+++ b/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops_test.py
@@ -0,0 +1,562 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for triplet_semihard_loss."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from tensorflow.contrib.losses.python import metric_learning as metric_loss_ops
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.platform import test
+try:
+  # pylint: disable=g-import-not-at-top
+  from sklearn import datasets
+  from sklearn import metrics
+  HAS_SKLEARN = True
+except ImportError:
+  HAS_SKLEARN = False
+
+
+def pairwise_distance_np(feature, squared=False):
+  """Computes the pairwise distance matrix in numpy.
+
+  Args:
+    feature: 2-D numpy array of size [number of data, feature dimension]
+    squared: Boolean. If true, output is the pairwise squared euclidean
+      distance matrix; else, output is the pairwise euclidean distance matrix.
+
+  Returns:
+    pairwise_distances: 2-D numpy array of size
+      [number of data, number of data].
+  """
+  triu = np.triu_indices(feature.shape[0], 1)
+  upper_tri_pdists = np.linalg.norm(feature[triu[1]] - feature[triu[0]], axis=1)
+  if squared:
+    upper_tri_pdists **= 2.
+  num_data = feature.shape[0]
+  pairwise_distances = np.zeros((num_data, num_data))
+  pairwise_distances[np.triu_indices(num_data, 1)] = upper_tri_pdists
+  # Make symmetrical.
+  pairwise_distances = pairwise_distances + pairwise_distances.T - np.diag(
+      pairwise_distances.diagonal())
+  return pairwise_distances
+
+
+class ContrastiveLossTest(test.TestCase):
+
+  def testContrastive(self):
+    with self.test_session():
+      num_data = 10
+      feat_dim = 6
+      margin = 1.0
+
+      embeddings_anchor = np.random.rand(num_data, feat_dim).astype(np.float32)
+      embeddings_positive = np.random.rand(num_data, feat_dim).astype(
+          np.float32)
+      labels = np.random.randint(0, 2, size=(num_data,)).astype(np.float32)
+
+      # Compute the loss in NP
+      dist = np.sqrt(
+          np.sum(np.square(embeddings_anchor - embeddings_positive), axis=1))
+      loss_np = np.mean(
+          labels * np.square(dist) +
+          (1.0 - labels) * np.square(np.maximum(margin - dist, 0.0)))
+      # Compute the loss with TF
+      loss_tf = metric_loss_ops.contrastive_loss(
+          labels=ops.convert_to_tensor(labels),
+          embeddings_anchor=ops.convert_to_tensor(embeddings_anchor),
+          embeddings_positive=ops.convert_to_tensor(embeddings_positive),
+          margin=margin)
+      loss_tf = loss_tf.eval()
+      self.assertAllClose(loss_np, loss_tf)
+
+
+class TripletSemiHardLossTest(test.TestCase):
+
+  def testTripletSemiHard(self):
+    with self.test_session():
+      num_data = 10
+      feat_dim = 6
+      margin = 1.0
+      num_classes = 4
+
+      embedding = np.random.rand(num_data, feat_dim).astype(np.float32)
+      labels = np.random.randint(
+          0, num_classes, size=(num_data)).astype(np.float32)
+
+      # Reshape labels to compute adjacency matrix.
+      labels_reshaped = np.reshape(labels, (labels.shape[0], 1))
+      # Compute the loss in NP.
+      adjacency = np.equal(labels_reshaped, labels_reshaped.T)
+
+      pdist_matrix = pairwise_distance_np(embedding, squared=True)
+      loss_np = 0.0
+      num_positives = 0.0
+      for i in range(num_data):
+        for j in range(num_data):
+          if adjacency[i][j] > 0.0 and i != j:
+            num_positives += 1.0
+
+            pos_distance = pdist_matrix[i][j]
+            neg_distances = []
+
+            for k in range(num_data):
+              if adjacency[i][k] == 0:
+                neg_distances.append(pdist_matrix[i][k])
+
+            # Sort by distance.
+            neg_distances.sort()
+            chosen_neg_distance = neg_distances[0]
+
+            for l in range(len(neg_distances)):
+              chosen_neg_distance = neg_distances[l]
+              if chosen_neg_distance > pos_distance:
+                break
+
+            loss_np += np.maximum(
+                0.0, margin - chosen_neg_distance + pos_distance)
+
+      loss_np /= num_positives
+
+      # Compute the loss in TF.
+      loss_tf = metric_loss_ops.triplet_semihard_loss(
+          labels=ops.convert_to_tensor(labels),
+          embeddings=ops.convert_to_tensor(embedding),
+          margin=margin)
+      loss_tf = loss_tf.eval()
+      self.assertAllClose(loss_np, loss_tf)
+
+
+class LiftedStructLossTest(test.TestCase):
+
+  def testLiftedStruct(self):
+    with self.test_session():
+      num_data = 10
+      feat_dim = 6
+      margin = 1.0
+      num_classes = 4
+
+      embedding = np.random.rand(num_data, feat_dim).astype(np.float32)
+      labels = np.random.randint(
+          0, num_classes, size=(num_data)).astype(np.float32)
+      # Reshape labels to compute adjacency matrix.
+      labels_reshaped = np.reshape(labels, (labels.shape[0], 1))
+
+      # Compute the loss in NP
+      adjacency = np.equal(labels_reshaped, labels_reshaped.T)
+      pdist_matrix = pairwise_distance_np(embedding)
+      loss_np = 0.0
+      num_constraints = 0.0
+      for i in range(num_data):
+        for j in range(num_data):
+          if adjacency[i][j] > 0.0 and i != j:
+            d_pos = pdist_matrix[i][j]
+            negs = []
+            for k in range(num_data):
+              if not adjacency[i][k]:
+                negs.append(margin - pdist_matrix[i][k])
+            for l in range(num_data):
+              if not adjacency[j][l]:
+                negs.append(margin - pdist_matrix[j][l])
+
+            negs = np.array(negs)
+            max_elem = np.max(negs)
+            negs -= max_elem
+            negs = np.exp(negs)
+            soft_maximum = np.log(np.sum(negs)) + max_elem
+
+            num_constraints += 1.0
+            this_loss = max(soft_maximum + d_pos, 0)
+            loss_np += this_loss * this_loss
+
+      loss_np = loss_np / num_constraints / 2.0
+
+      # Compute the loss in TF
+      loss_tf = metric_loss_ops.lifted_struct_loss(
+          labels=ops.convert_to_tensor(labels),
+          embeddings=ops.convert_to_tensor(embedding),
+          margin=margin)
+      loss_tf = loss_tf.eval()
+      self.assertAllClose(loss_np, loss_tf)
+
+
+def convert_to_list_of_sparse_tensor(np_matrix):
+  list_of_sparse_tensors = []
+  nrows, ncols = np_matrix.shape
+  for i in range(nrows):
+    sp_indices = []
+    for j in range(ncols):
+      if np_matrix[i][j] == 1:
+        sp_indices.append([j])
+
+    num_non_zeros = len(sp_indices)
+    list_of_sparse_tensors.append(sparse_tensor.SparseTensor(
+        indices=np.array(sp_indices),
+        values=np.ones((num_non_zeros,)),
+        dense_shape=np.array([ncols,])))
+
+  return list_of_sparse_tensors
+
+
+class NpairsLossTest(test.TestCase):
+
+  def testNpairs(self):
+    with self.test_session():
+      num_data = 15
+      feat_dim = 6
+      num_classes = 5
+      reg_lambda = 0.02
+
+      embeddings_anchor = np.random.rand(num_data, feat_dim).astype(np.float32)
+      embeddings_positive = np.random.rand(num_data, feat_dim).astype(
+          np.float32)
+
+      labels = np.random.randint(
+          0, num_classes, size=(num_data)).astype(np.float32)
+      # Reshape labels to compute adjacency matrix.
+      labels_reshaped = np.reshape(labels, (labels.shape[0], 1))
+
+      # Compute the loss in NP
+      reg_term = np.mean(np.sum(np.square(embeddings_anchor), 1))
+      reg_term += np.mean(np.sum(np.square(embeddings_positive), 1))
+      reg_term *= 0.25 * reg_lambda
+
+      similarity_matrix = np.matmul(embeddings_anchor, embeddings_positive.T)
+
+      labels_remapped = np.equal(
+          labels_reshaped, labels_reshaped.T).astype(np.float32)
+      labels_remapped /= np.sum(labels_remapped, axis=1, keepdims=True)
+
+      xent_loss = math_ops.reduce_mean(nn.softmax_cross_entropy_with_logits(
+          logits=ops.convert_to_tensor(similarity_matrix),
+          labels=ops.convert_to_tensor(labels_remapped))).eval()
+      loss_np = xent_loss + reg_term
+
+      # Compute the loss in TF
+      loss_tf = metric_loss_ops.npairs_loss(
+          labels=ops.convert_to_tensor(labels),
+          embeddings_anchor=ops.convert_to_tensor(embeddings_anchor),
+          embeddings_positive=ops.convert_to_tensor(embeddings_positive),
+          reg_lambda=reg_lambda)
+      loss_tf = loss_tf.eval()
+      self.assertAllClose(loss_np, loss_tf)
+
+
+class NpairsLossMultiLabelTest(test.TestCase):
+
+  def testNpairsMultiLabelLossWithSingleLabelEqualsNpairsLoss(self):
+    with self.test_session():
+      num_data = 15
+      feat_dim = 6
+      reg_lambda = 0.02
+
+      embeddings_anchor = np.random.rand(num_data, feat_dim).astype(np.float32)
+      embeddings_positive = np.random.rand(num_data, feat_dim).astype(
+          np.float32)
+      labels = np.arange(num_data)
+      labels = np.reshape(labels, -1)
+
+      # Compute vanila npairs loss.
+      loss_npairs = metric_loss_ops.npairs_loss(
+          labels=ops.convert_to_tensor(labels),
+          embeddings_anchor=ops.convert_to_tensor(embeddings_anchor),
+          embeddings_positive=ops.convert_to_tensor(embeddings_positive),
+          reg_lambda=reg_lambda).eval()
+
+      # Compute npairs multilabel loss.
+      labels_one_hot = np.identity(num_data)
+      loss_npairs_multilabel = metric_loss_ops.npairs_loss_multilabel(
+          sparse_labels=convert_to_list_of_sparse_tensor(labels_one_hot),
+          embeddings_anchor=ops.convert_to_tensor(embeddings_anchor),
+          embeddings_positive=ops.convert_to_tensor(embeddings_positive),
+          reg_lambda=reg_lambda).eval()
+
+      self.assertAllClose(loss_npairs, loss_npairs_multilabel)
+
+  def testNpairsMultiLabel(self):
+    with self.test_session():
+      num_data = 15
+      feat_dim = 6
+      num_classes = 10
+      reg_lambda = 0.02
+
+      embeddings_anchor = np.random.rand(num_data, feat_dim).astype(np.float32)
+      embeddings_positive = np.random.rand(num_data, feat_dim).astype(
+          np.float32)
+
+      labels = np.random.randint(0, 2, (num_data, num_classes))
+      # set entire column to one so that each row has at least one bit set.
+      labels[:, -1] = 1
+
+      # Compute the loss in NP
+      reg_term = np.mean(np.sum(np.square(embeddings_anchor), 1))
+      reg_term += np.mean(np.sum(np.square(embeddings_positive), 1))
+      reg_term *= 0.25 * reg_lambda
+
+      similarity_matrix = np.matmul(embeddings_anchor, embeddings_positive.T)
+
+      labels_remapped = np.dot(labels, labels.T).astype(np.float)
+      labels_remapped /= np.sum(labels_remapped, 1, keepdims=True)
+
+      xent_loss = math_ops.reduce_mean(nn.softmax_cross_entropy_with_logits(
+          logits=ops.convert_to_tensor(similarity_matrix),
+          labels=ops.convert_to_tensor(labels_remapped))).eval()
+      loss_np = xent_loss + reg_term
+
+      # Compute the loss in TF
+      loss_tf = metric_loss_ops.npairs_loss_multilabel(
+          sparse_labels=convert_to_list_of_sparse_tensor(labels),
+          embeddings_anchor=ops.convert_to_tensor(embeddings_anchor),
+          embeddings_positive=ops.convert_to_tensor(embeddings_positive),
+          reg_lambda=reg_lambda)
+      loss_tf = loss_tf.eval()
+
+      self.assertAllClose(loss_np, loss_tf)
+
+
+def compute_ground_truth_cluster_score(feat, y):
+  y_unique = np.unique(y)
+  score_gt_np = 0.0
+  for c in y_unique:
+    feat_subset = feat[y == c, :]
+    pdist_subset = pairwise_distance_np(feat_subset)
+    score_gt_np += -1.0 * np.min(np.sum(pdist_subset, axis=0))
+  score_gt_np = score_gt_np.astype(np.float32)
+  return score_gt_np
+
+
+def compute_cluster_loss_numpy(feat,
+                               y,
+                               margin_multiplier=1.0,
+                               enable_pam_finetuning=True):
+  if enable_pam_finetuning:
+    facility = ForwardGreedyFacility(
+        n_clusters=np.unique(y).size).pam_augmented_fit(feat, y,
+                                                        margin_multiplier)
+  else:
+    facility = ForwardGreedyFacility(
+        n_clusters=np.unique(y).size).loss_augmented_fit(feat, y,
+                                                         margin_multiplier)
+
+  score_augmented = facility.score_aug_
+  score_gt = compute_ground_truth_cluster_score(feat, y)
+  return np.maximum(np.float32(0.0), score_augmented - score_gt)
+
+
+class ForwardGreedyFacility(object):
+
+  def __init__(self, n_clusters=8):
+    self.n_clusters = n_clusters
+    self.center_ics_ = None
+
+  def _check_init_args(self):
+    # Check n_clusters.
+    if (self.n_clusters is None or self.n_clusters <= 0 or
+        not isinstance(self.n_clusters, int)):
+      raise ValueError('n_clusters has to be nonnegative integer.')
+
+  def loss_augmented_fit(self, feat, y, loss_mult):
+    """Fit K-Medoids to the provided data."""
+    self._check_init_args()
+    # Check that the array is good and attempt to convert it to
+    # Numpy array if possible.
+    feat = self._check_array(feat)
+    # Apply distance metric to get the distance matrix.
+    pdists = pairwise_distance_np(feat)
+
+    num_data = feat.shape[0]
+    candidate_ids = list(range(num_data))
+    candidate_scores = np.zeros(num_data,)
+    subset = []
+
+    k = 0
+    while k < self.n_clusters:
+      candidate_scores = []
+      for i in candidate_ids:
+        # push i to subset.
+        subset.append(i)
+        marginal_cost = -1.0 * np.sum(np.min(pdists[:, subset], axis=1))
+        loss = 1.0 - metrics.normalized_mutual_info_score(
+            y, self._get_cluster_ics(pdists, subset))
+        candidate_scores.append(marginal_cost + loss_mult * loss)
+        # remove i from subset.
+        subset.pop()
+
+      # push i_star to subset.
+      i_star = candidate_ids[np.argmax(candidate_scores)]
+      subset.append(i_star)
+      # remove i_star from candidate indices.
+      candidate_ids.remove(i_star)
+      k += 1
+
+    # Expose labels_ which are the assignments of
+    # the training data to clusters.
+    self.labels_ = self._get_cluster_ics(pdists, subset)
+    # Expose cluster centers, i.e. medoids.
+    self.cluster_centers_ = feat.take(subset, axis=0)
+    # Expose indices of chosen cluster centers.
+    self.center_ics_ = subset
+    # Expose the score = -\sum_{i \in V} min_{j \in S} || x_i - x_j ||
+    self.score_ = np.float32(-1.0) * self._get_facility_distance(pdists, subset)
+    self.score_aug_ = self.score_ + loss_mult * (
+        1.0 - metrics.normalized_mutual_info_score(
+            y, self._get_cluster_ics(pdists, subset)))
+    self.score_aug_ = self.score_aug_.astype(np.float32)
+    # Expose the chosen cluster indices.
+    self.subset_ = subset
+    return self
+
+  def _augmented_update_medoid_ics_in_place(self, pdists, y_gt, cluster_ics,
+                                            medoid_ics, loss_mult):
+    for cluster_idx in range(self.n_clusters):
+      # y_pred = self._get_cluster_ics(D, medoid_ics)
+      # Don't prematurely do the assignment step.
+      # Do this after we've updated all cluster medoids.
+      y_pred = cluster_ics
+
+      if sum(y_pred == cluster_idx) == 0:
+        # Cluster is empty.
+        continue
+
+      curr_score = (
+          -1.0 * np.sum(
+              pdists[medoid_ics[cluster_idx], y_pred == cluster_idx]) +
+          loss_mult * (1.0 - metrics.normalized_mutual_info_score(
+              y_gt, y_pred)))
+
+      pdist_in = pdists[y_pred == cluster_idx, :]
+      pdist_in = pdist_in[:, y_pred == cluster_idx]
+
+      all_scores_fac = np.sum(-1.0 * pdist_in, axis=1)
+      all_scores_loss = []
+      for i in range(y_pred.size):
+        if y_pred[i] != cluster_idx:
+          continue
+        # remove this cluster's current centroid
+        medoid_ics_i = medoid_ics[:cluster_idx] + medoid_ics[cluster_idx + 1:]
+        # add this new candidate to the centroid list
+        medoid_ics_i += [i]
+        y_pred_i = self._get_cluster_ics(pdists, medoid_ics_i)
+        all_scores_loss.append(loss_mult * (
+            1.0 - metrics.normalized_mutual_info_score(y_gt, y_pred_i)))
+
+      all_scores = all_scores_fac + all_scores_loss
+      max_score_idx = np.argmax(all_scores)
+      max_score = all_scores[max_score_idx]
+
+      if max_score > curr_score:
+        medoid_ics[cluster_idx] = np.where(
+            y_pred == cluster_idx)[0][max_score_idx]
+
+  def pam_augmented_fit(self, feat, y, loss_mult):
+    pam_max_iter = 5
+    self._check_init_args()
+    feat = self._check_array(feat)
+    pdists = pairwise_distance_np(feat)
+    self.loss_augmented_fit(feat, y, loss_mult)
+    print('PAM -1 (before PAM): score: %f, score_aug: %f' % (
+        self.score_, self.score_aug_))
+    # Initialize from loss augmented facility location
+    subset = self.center_ics_
+    for iter_ in range(pam_max_iter):
+      # update the cluster assignment
+      cluster_ics = self._get_cluster_ics(pdists, subset)
+      # update the medoid for each clusters
+      self._augmented_update_medoid_ics_in_place(pdists, y, cluster_ics, subset,
+                                                 loss_mult)
+      self.score_ = np.float32(-1.0) * self._get_facility_distance(
+          pdists, subset)
+      self.score_aug_ = self.score_ + loss_mult * (
+          1.0 - metrics.normalized_mutual_info_score(
+              y, self._get_cluster_ics(pdists, subset)))
+      self.score_aug_ = self.score_aug_.astype(np.float32)
+      print('PAM iter: %d, score: %f, score_aug: %f' % (iter_, self.score_,
+                                                        self.score_aug_))
+
+    self.center_ics_ = subset
+    self.labels_ = cluster_ics
+    return self
+
+  def _check_array(self, feat):
+    # Check that the number of clusters is less than or equal to
+    # the number of samples
+    if self.n_clusters > feat.shape[0]:
+      raise ValueError('The number of medoids ' + '({}) '.format(
+          self.n_clusters) + 'must be larger than the number ' +
+                       'of samples ({})'.format(feat.shape[0]))
+    return feat
+
+  def _get_cluster_ics(self, pdists, subset):
+    """Returns cluster indices for pdist and current medoid indices."""
+    # Assign data points to clusters based on
+    # which cluster assignment yields
+    # the smallest distance`
+    cluster_ics = np.argmin(pdists[subset, :], axis=0)
+    return cluster_ics
+
+  def _get_facility_distance(self, pdists, subset):
+    return np.sum(np.min(pdists[subset, :], axis=0))
+
+
+class ClusterLossTest(test.TestCase):
+
+  def _genClusters(self, n_samples, n_clusters):
+    blobs = datasets.make_blobs(
+        n_samples=n_samples, centers=n_clusters)
+    embedding, labels = blobs
+    embedding = (embedding - embedding.mean(axis=0)) / embedding.std(axis=0)
+    embedding = embedding.astype(np.float32)
+    return embedding, labels
+
+  def testClusteringLossPAMOff(self):
+    if not HAS_SKLEARN:
+      return
+    with self.test_session():
+      margin_multiplier = 10.0
+      embeddings, labels = self._genClusters(n_samples=128, n_clusters=64)
+
+      loss_np = compute_cluster_loss_numpy(
+          embeddings, labels, margin_multiplier, enable_pam_finetuning=False)
+      loss_tf = metric_loss_ops.cluster_loss(
+          labels=ops.convert_to_tensor(labels),
+          embeddings=ops.convert_to_tensor(embeddings),
+          margin_multiplier=margin_multiplier,
+          enable_pam_finetuning=False)
+      loss_tf = loss_tf.eval()
+      self.assertAllClose(loss_np, loss_tf)
+
+  def testClusteringLossPAMOn(self):
+    if not HAS_SKLEARN:
+      return
+    with self.test_session():
+      margin_multiplier = 10.0
+      embeddings, labels = self._genClusters(n_samples=128, n_clusters=64)
+
+      loss_np = compute_cluster_loss_numpy(
+          embeddings, labels, margin_multiplier, enable_pam_finetuning=True)
+      loss_tf = metric_loss_ops.cluster_loss(
+          labels=ops.convert_to_tensor(labels),
+          embeddings=ops.convert_to_tensor(embeddings),
+          margin_multiplier=margin_multiplier,
+          enable_pam_finetuning=True)
+      loss_tf = loss_tf.eval()
+      self.assertAllClose(loss_np, loss_tf)
+
+if __name__ == '__main__':
+  test.main()
author	A. Unique TensorFlower <gardener@tensorflow.org>	2017-10-12 12:41:40 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-10-12 12:45:35 -0700
commit	deb72df13e588eda07968adc306c1a87416cf7fc (patch)
tree	b60b8cc80aee43e2be5b89d50858148f70c96618 /tensorflow/contrib/losses
parent	453515d166a7d3a42e7075f2267ba85a9ff25b96 (diff)