Add mode_override to the TPU embedding enqueue ops. This allows the mode to be

overridden at runtime allowing dynamic switching between inference and training modes. Not fully implemented yet. PiperOrigin-RevId: 215325071
author: A. Unique TensorFlower <gardener@tensorflow.org> 2018-10-01 19:56:47 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-10-01 19:59:48 -0700
commit: 350388fca9cb9509962ff393a9d21fb2879c9179 (patch)
tree: b1613b611d5ed4f657a3bda89702d259c7639ec4 /tensorflow/contrib/tpu
parent: 991f06fd50fc73285ce415d57f720994c2b2e861 (diff)
3 files changed, 186 insertions, 17 deletions
diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index 0c4bdab191..10ed1c2891 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -135,6 +135,9 @@ tf_gen_op_wrapper_py(
     name = "tpu_ops",
     hidden = [
         "SendTPUEmbeddingGradients",
+        "EnqueueTPUEmbeddingIntegerBatch",
+        "EnqueueTPUEmbeddingSparseBatch",
+        "EnqueueTPUEmbeddingSparseTensorBatch",
     ],
     deps = [
         ":cross_replica_ops_op_lib",
diff --git a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
index ef2f8dd36d..0ef29bdf73 100644
--- a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
+++ b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
@@ -335,7 +335,6 @@ void RegisterPerTableLoadAndRetrieveOps() {
     tpu::GradientAccumulationSupport grad_accum_support;
     TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
     if (grad_accum_support == tpu::GradientAccumulationSupport::kSupported) {
-      // TODO(gkurian): Condition this on being used internally within Google.
       OpRegistry::Global()->Register(
           [alg](OpRegistrationData* op_reg_data) -> Status {
             return RegisterPerTableLoadOpsForAlgorithmBody(alg, true,
@@ -353,7 +352,6 @@ void RegisterPerTableLoadAndRetrieveOps() {
     tpu::GradientAccumulationSupport grad_accum_support;
     TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
     if (grad_accum_support == tpu::GradientAccumulationSupport::kSupported) {
-      // TODO(gkurian): Condition this on being used internally within Google.
       OpRegistry::Global()->Register(
           [alg](OpRegistrationData* op_reg_data) -> Status {
             return RegisterPerTableRetrieveOpsForAlgorithmBody(alg, true,
@@ -366,7 +364,7 @@ void RegisterPerTableLoadAndRetrieveOps() {
 }  // namespace
 
 REGISTER_OP("RecvTPUEmbeddingActivations")
-    .Output("outputs: num_outputs * float")
+    .Output("outputs: num_outputs * float32")
     .Attr("num_outputs: int >= 1")
     .Attr("config: string")
     .SetIsStateful()
@@ -476,7 +474,8 @@ config: Serialized TPUEmbeddingConfiguration proto.
 
 REGISTER_OP("EnqueueTPUEmbeddingIntegerBatch")
     .Input("batch: N * int32")
-    .Attr("N: int")
+    .Input("mode_override: string")
+    .Attr("N: int >= 1")
     .Attr("device_ordinal: int = -1")
     .SetIsStateful()
     .SetShapeFn(shape_inference::UnknownShape)
@@ -485,6 +484,10 @@ An op that enqueues a list of input batch tensors to TPUEmbedding.
 
 batch: A list of 1D tensors, one for each embedding table, containing the
     indices into the tables.
+mode_override: A string input that overrides the mode specified in the
+    TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+    'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+    in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
 device_ordinal: The TPU device to use. Should be >= 0 and less than the number
     of TPU cores in the task on which the node is placed.
 )doc");
@@ -493,7 +496,8 @@ REGISTER_OP("EnqueueTPUEmbeddingSparseBatch")
     .Input("sample_indices: N * int32")
     .Input("embedding_indices: N * int32")
     .Input("aggregation_weights: N * float32")
-    .Attr("N: int")
+    .Input("mode_override: string")
+    .Attr("N: int >= 1")
     .Attr("device_ordinal: int = -1")
     .Attr("combiners: list(string) = []")
     .SetIsStateful()
@@ -523,14 +527,18 @@ The tensors at corresponding positions in the three input lists
 must have the same shape, i.e. rank 1 with dim_size() equal to the total
 number of lookups into the table described by the corresponding table_id.
 
-sample_indices: A list of Rank 1 Tensors specifying the training example and
+sample_indices: A list of rank 1 Tensors specifying the training example and
     feature to which the corresponding embedding_indices and aggregation_weights
     values belong. sample_indices[i] must equal b * nf + f, where nf is the
     number of features from the corresponding table, f is in [0, nf), and
     b is in [0, batch size).
-embedding_indices: A list of Rank 1 Tensors, indices into the embedding tables.
-aggregation_weights: A list of Rank 1 Tensors containing per sample -- i.e. per
+embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
+aggregation_weights: A list of rank 1 Tensors containing per sample -- i.e. per
     (training example, feature) -- aggregation weights.
+mode_override: A string input that overrides the mode specified in the
+    TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+    'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+    in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
 device_ordinal: The TPU device to use. Should be >= 0 and less than the number
     of TPU cores in the task on which the node is placed.
 combiners: A list of string scalars, one for each embedding table that specify
@@ -545,7 +553,8 @@ REGISTER_OP("EnqueueTPUEmbeddingSparseTensorBatch")
     .Input("sample_indices: N * int32")
     .Input("embedding_indices: N * int32")
     .Input("aggregation_weights: N * float32")
-    .Attr("N: int")
+    .Input("mode_override: string")
+    .Attr("N: int >= 1")
     .Attr("device_ordinal: int = -1")
     .Attr("combiners: list(string) = []")
     .Attr("table_ids: list(int)")
@@ -555,7 +564,7 @@ REGISTER_OP("EnqueueTPUEmbeddingSparseTensorBatch")
 This Op eases the porting of code that uses tf.nn.embedding_lookup_sparse().
 
 sample_indices[i], embedding_indices[i] and aggregation_weights[i] correspond
-to ith feature. table_ids[i] indicates which embedding table to look up ith
+to the ith feature. table_ids[i] indicates which embedding table to look up ith
 feature.
 
 The tensors at corresponding positions in the three input lists (sample_indices,
@@ -563,12 +572,18 @@ embedding_indices and aggregation_weights) must have the same shape, i.e. rank 1
 with dim_size() equal to the total number of lookups into the table described by
 the corresponding feature.
 
-sample_indices: A list of Rank 1 Tensors, corresponds to sp_ids.indices[:,0] in
+sample_indices: A list of rank 1 Tensors specifying the training example to
+    which the corresponding embedding_indices and aggregation_weights values
+    belong. It corresponds to sp_ids.indices[:,0] in  embedding_lookup_sparse().
+embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
+    It corresponds to sp_ids.values in embedding_lookup_sparse().
+aggregation_weights: A list of rank 1 Tensors containing per training example
+    aggregation weights. It corresponds to sp_weights.values in
     embedding_lookup_sparse().
-embedding_indices: A list of Rank 1 Tensors, corresponds to sp_ids.values
-    in embedding_lookup_sparse().
-aggregation_weights: A list of Rank 1 Tensors, corresponds to sp_weights.values
-    in embedding_lookup_sparse().
+mode_override: A string input that overrides the mode specified in the
+    TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+    'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+    in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
 device_ordinal: The TPU device to use. Should be >= 0 and less than the number
     of TPU cores in the task on which the node is placed.
 combiners: A list of string scalars, one for each embedding table that specify
@@ -577,8 +592,11 @@ combiners: A list of string scalars, one for each embedding table that specify
     the sum of the weights be 0 for 'mean' or the sum of the squared weights be
     0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
     all tables.
-table_ids: A list of int. table_ids[i] indicates which embedding table to look
-    up ith feature in the list.
+table_ids: A list of integers specifying the identifier of the embedding table
+    (offset of TableDescriptor in the TPUEmbeddingConfiguration) to lookup the
+    corresponding input. The ith input is looked up using table_ids[i]. The size
+    of the table_ids list must be equal to that of sample_indices,
+    embedding_indices and aggregation_weights.
 )doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/tpu/python/ops/tpu_ops.py b/tensorflow/contrib/tpu/python/ops/tpu_ops.py
index e2e4acadab..968adccf2b 100644
--- a/tensorflow/contrib/tpu/python/ops/tpu_ops.py
+++ b/tensorflow/contrib/tpu/python/ops/tpu_ops.py
@@ -227,6 +227,154 @@ if platform.system() != "Windows":
         inputs=inputs, learning_rates=learning_rates, config=config, name=name)
 
 
+  send_tpu_embedding_gradients.__doc__ = (
+      gen_tpu_ops._send_tpu_embedding_gradients.__doc__)
+
+  # pylint: disable=protected-access
+  def enqueue_tpu_embedding_integer_batch(batch,
+                                          device_ordinal,
+                                          mode_override=None,
+                                          name=None):
+    """A placeholder op for enqueueing embedding IDs to the TPU.
+
+    Args:
+      batch: A list of 1D tensors, one for each embedding table, containing the
+        indices into the tables.
+      device_ordinal: The TPU device to use. Should be >= 0 and less than the
+        number of TPU cores in the task on which the node is placed.
+      mode_override: A string input that overrides the mode specified in the
+        TPUEmbeddingConfiguration. Supported values are {'unspecified',
+        'inference', 'training', 'backward_pass_only'}. When set to
+        'unspecified', the mode set in TPUEmbeddingConfiguration is used,
+        otherwise mode_override is used (optional).
+      name: A name for the operation (optional).
+
+    Returns:
+      An EnqueueTPUEmbeddingIntegerBatch operation.
+    """
+    if mode_override is None:
+      mode_override = "unspecified"
+    return gen_tpu_ops._enqueue_tpu_embedding_integer_batch(
+        batch=batch,
+        device_ordinal=device_ordinal,
+        mode_override=mode_override,
+        name=name)
+
+  enqueue_tpu_embedding_integer_batch.__doc__ = (
+      gen_tpu_ops._enqueue_tpu_embedding_integer_batch.__doc__)
+
+  # pylint: disable=protected-access
+  def enqueue_tpu_embedding_sparse_batch(sample_indices,
+                                         embedding_indices,
+                                         aggregation_weights,
+                                         device_ordinal,
+                                         combiners=None,
+                                         mode_override=None,
+                                         name=None):
+    """A placeholder op for enqueueing embedding IDs to the TPU.
+
+    Args:
+      sample_indices: A list of rank 1 Tensors specifying the training example
+        and feature to which the corresponding embedding_indices and
+        aggregation_weights values belong. sample_indices[i] must equal b * nf +
+        f, where nf is the number of features from the corresponding table, f is
+        in [0, nf), and b is in [0, batch size).
+      embedding_indices: A list of rank 1 Tensors, indices into the embedding
+        tables.
+      aggregation_weights: A list of rank 1 Tensors containing per sample --
+        i.e. per (training example, feature) -- aggregation weights.
+      device_ordinal: The TPU device to use. Should be >= 0 and less than the
+        number of TPU cores in the task on which the node is placed.
+      combiners: A list of string scalars, one for each embedding table that
+        specify how to normalize the embedding activations after weighted
+        summation. Supported combiners are 'mean', 'sum', or 'sqrtn'. It is
+        invalid to have the sum of the weights be 0 for 'mean' or the sum of the
+        squared weights be 0 for 'sqrtn'. If combiners isn't passed, the default
+        is to use 'sum' for all tables (optional).
+      mode_override: A string input that overrides the mode specified in the
+        TPUEmbeddingConfiguration. Supported values are {'unspecified',
+        'inference', 'training', 'backward_pass_only'}. When set to
+        'unspecified', the mode set in TPUEmbeddingConfiguration is used,
+        otherwise mode_override is used (optional).
+      name: A name for the operation (optional).
+
+    Returns:
+      An EnqueueTPUEmbeddingSparseBatch operation.
+    """
+    if mode_override is None:
+      mode_override = "unspecified"
+    return gen_tpu_ops._enqueue_tpu_embedding_sparse_batch(
+        sample_indices=sample_indices,
+        embedding_indices=embedding_indices,
+        aggregation_weights=aggregation_weights,
+        device_ordinal=device_ordinal,
+        combiners=combiners,
+        mode_override=mode_override,
+        name=name)
+
+  enqueue_tpu_embedding_sparse_batch.__doc__ = (
+      gen_tpu_ops._enqueue_tpu_embedding_sparse_batch.__doc__)
+
+  # pylint: disable=protected-access
+  def enqueue_tpu_embedding_sparse_tensor_batch(sample_indices,
+                                                embedding_indices,
+                                                aggregation_weights,
+                                                table_ids,
+                                                device_ordinal,
+                                                combiners=None,
+                                                mode_override=None,
+                                                name=None):
+    """A placeholder op for enqueueing embedding IDs to the TPU.
+
+    Args:
+      sample_indices: A list of rank 1 Tensors specifying the training example
+        to which the corresponding embedding_indices and aggregation_weights
+        values
+        belong. It corresponds to sp_ids.indices[:,0] in
+          embedding_lookup_sparse().
+      embedding_indices: A list of rank 1 Tensors, indices into the embedding
+        tables. It corresponds to sp_ids.values in embedding_lookup_sparse().
+      aggregation_weights: A list of rank 1 Tensors containing per training
+        example aggregation weights. It corresponds to sp_weights.values in
+        embedding_lookup_sparse().
+      table_ids: A list of integers specifying the identifier of the embedding
+        table (offset of TableDescriptor in the TPUEmbeddingConfiguration) to
+        lookup the corresponding input. The ith input is looked up using
+        table_ids[i]. The size of the table_ids list must be equal to that of
+        sample_indices, embedding_indices and aggregation_weights.
+      device_ordinal: The TPU device to use. Should be >= 0 and less than the
+        number of TPU cores in the task on which the node is placed.
+      combiners: A list of string scalars, one for each embedding table that
+        specify how to normalize the embedding activations after weighted
+        summation. Supported combiners are 'mean', 'sum', or 'sqrtn'. It is
+        invalid to have the sum of the weights be 0 for 'mean' or the sum of the
+        squared weights be 0 for 'sqrtn'. If combiners isn't passed, the default
+        is to use 'sum' for all tables (optional).
+      mode_override: A string input that overrides the mode specified in the
+        TPUEmbeddingConfiguration. Supported values are {'unspecified',
+        'inference', 'training', 'backward_pass_only'}. When set to
+        'unspecified', the mode set in TPUEmbeddingConfiguration is used,
+        otherwise mode_override is used (optional).
+      name: A name for the operation (optional).
+
+    Returns:
+      An EnqueueTPUEmbeddingSparseTensorBatch operation.
+    """
+    if mode_override is None:
+      mode_override = "unspecified"
+    return gen_tpu_ops._enqueue_tpu_embedding_sparse_tensor_batch(
+        sample_indices=sample_indices,
+        embedding_indices=embedding_indices,
+        aggregation_weights=aggregation_weights,
+        table_ids=table_ids,
+        device_ordinal=device_ordinal,
+        combiners=combiners,
+        mode_override=mode_override,
+        name=name)
+
+  enqueue_tpu_embedding_sparse_tensor_batch.__doc__ = (
+      gen_tpu_ops._enqueue_tpu_embedding_sparse_tensor_batch.__doc__)
+
 else:
   # We have already built the appropriate libraries into the binary via CMake
   # if we have built contrib, so we don't need this
author	A. Unique TensorFlower <gardener@tensorflow.org>	2018-10-01 19:56:47 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-10-01 19:59:48 -0700
commit	350388fca9cb9509962ff393a9d21fb2879c9179 (patch)
tree	b1613b611d5ed4f657a3bda89702d259c7639ec4 /tensorflow/contrib/tpu
parent	991f06fd50fc73285ce415d57f720994c2b2e861 (diff)