aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/tpu
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-10-01 19:56:47 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-10-01 19:59:48 -0700
commit350388fca9cb9509962ff393a9d21fb2879c9179 (patch)
treeb1613b611d5ed4f657a3bda89702d259c7639ec4 /tensorflow/contrib/tpu
parent991f06fd50fc73285ce415d57f720994c2b2e861 (diff)
Add mode_override to the TPU embedding enqueue ops. This allows the mode to be
overridden at runtime allowing dynamic switching between inference and training modes. Not fully implemented yet. PiperOrigin-RevId: 215325071
Diffstat (limited to 'tensorflow/contrib/tpu')
-rw-r--r--tensorflow/contrib/tpu/BUILD3
-rw-r--r--tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc52
-rw-r--r--tensorflow/contrib/tpu/python/ops/tpu_ops.py148
3 files changed, 186 insertions, 17 deletions
diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index 0c4bdab191..10ed1c2891 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -135,6 +135,9 @@ tf_gen_op_wrapper_py(
name = "tpu_ops",
hidden = [
"SendTPUEmbeddingGradients",
+ "EnqueueTPUEmbeddingIntegerBatch",
+ "EnqueueTPUEmbeddingSparseBatch",
+ "EnqueueTPUEmbeddingSparseTensorBatch",
],
deps = [
":cross_replica_ops_op_lib",
diff --git a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
index ef2f8dd36d..0ef29bdf73 100644
--- a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
+++ b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
@@ -335,7 +335,6 @@ void RegisterPerTableLoadAndRetrieveOps() {
tpu::GradientAccumulationSupport grad_accum_support;
TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
if (grad_accum_support == tpu::GradientAccumulationSupport::kSupported) {
- // TODO(gkurian): Condition this on being used internally within Google.
OpRegistry::Global()->Register(
[alg](OpRegistrationData* op_reg_data) -> Status {
return RegisterPerTableLoadOpsForAlgorithmBody(alg, true,
@@ -353,7 +352,6 @@ void RegisterPerTableLoadAndRetrieveOps() {
tpu::GradientAccumulationSupport grad_accum_support;
TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
if (grad_accum_support == tpu::GradientAccumulationSupport::kSupported) {
- // TODO(gkurian): Condition this on being used internally within Google.
OpRegistry::Global()->Register(
[alg](OpRegistrationData* op_reg_data) -> Status {
return RegisterPerTableRetrieveOpsForAlgorithmBody(alg, true,
@@ -366,7 +364,7 @@ void RegisterPerTableLoadAndRetrieveOps() {
} // namespace
REGISTER_OP("RecvTPUEmbeddingActivations")
- .Output("outputs: num_outputs * float")
+ .Output("outputs: num_outputs * float32")
.Attr("num_outputs: int >= 1")
.Attr("config: string")
.SetIsStateful()
@@ -476,7 +474,8 @@ config: Serialized TPUEmbeddingConfiguration proto.
REGISTER_OP("EnqueueTPUEmbeddingIntegerBatch")
.Input("batch: N * int32")
- .Attr("N: int")
+ .Input("mode_override: string")
+ .Attr("N: int >= 1")
.Attr("device_ordinal: int = -1")
.SetIsStateful()
.SetShapeFn(shape_inference::UnknownShape)
@@ -485,6 +484,10 @@ An op that enqueues a list of input batch tensors to TPUEmbedding.
batch: A list of 1D tensors, one for each embedding table, containing the
indices into the tables.
+mode_override: A string input that overrides the mode specified in the
+ TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+ 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+ in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
device_ordinal: The TPU device to use. Should be >= 0 and less than the number
of TPU cores in the task on which the node is placed.
)doc");
@@ -493,7 +496,8 @@ REGISTER_OP("EnqueueTPUEmbeddingSparseBatch")
.Input("sample_indices: N * int32")
.Input("embedding_indices: N * int32")
.Input("aggregation_weights: N * float32")
- .Attr("N: int")
+ .Input("mode_override: string")
+ .Attr("N: int >= 1")
.Attr("device_ordinal: int = -1")
.Attr("combiners: list(string) = []")
.SetIsStateful()
@@ -523,14 +527,18 @@ The tensors at corresponding positions in the three input lists
must have the same shape, i.e. rank 1 with dim_size() equal to the total
number of lookups into the table described by the corresponding table_id.
-sample_indices: A list of Rank 1 Tensors specifying the training example and
+sample_indices: A list of rank 1 Tensors specifying the training example and
feature to which the corresponding embedding_indices and aggregation_weights
values belong. sample_indices[i] must equal b * nf + f, where nf is the
number of features from the corresponding table, f is in [0, nf), and
b is in [0, batch size).
-embedding_indices: A list of Rank 1 Tensors, indices into the embedding tables.
-aggregation_weights: A list of Rank 1 Tensors containing per sample -- i.e. per
+embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
+aggregation_weights: A list of rank 1 Tensors containing per sample -- i.e. per
(training example, feature) -- aggregation weights.
+mode_override: A string input that overrides the mode specified in the
+ TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+ 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+ in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
device_ordinal: The TPU device to use. Should be >= 0 and less than the number
of TPU cores in the task on which the node is placed.
combiners: A list of string scalars, one for each embedding table that specify
@@ -545,7 +553,8 @@ REGISTER_OP("EnqueueTPUEmbeddingSparseTensorBatch")
.Input("sample_indices: N * int32")
.Input("embedding_indices: N * int32")
.Input("aggregation_weights: N * float32")
- .Attr("N: int")
+ .Input("mode_override: string")
+ .Attr("N: int >= 1")
.Attr("device_ordinal: int = -1")
.Attr("combiners: list(string) = []")
.Attr("table_ids: list(int)")
@@ -555,7 +564,7 @@ REGISTER_OP("EnqueueTPUEmbeddingSparseTensorBatch")
This Op eases the porting of code that uses tf.nn.embedding_lookup_sparse().
sample_indices[i], embedding_indices[i] and aggregation_weights[i] correspond
-to ith feature. table_ids[i] indicates which embedding table to look up ith
+to the ith feature. table_ids[i] indicates which embedding table to look up ith
feature.
The tensors at corresponding positions in the three input lists (sample_indices,
@@ -563,12 +572,18 @@ embedding_indices and aggregation_weights) must have the same shape, i.e. rank 1
with dim_size() equal to the total number of lookups into the table described by
the corresponding feature.
-sample_indices: A list of Rank 1 Tensors, corresponds to sp_ids.indices[:,0] in
+sample_indices: A list of rank 1 Tensors specifying the training example to
+ which the corresponding embedding_indices and aggregation_weights values
+ belong. It corresponds to sp_ids.indices[:,0] in embedding_lookup_sparse().
+embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
+ It corresponds to sp_ids.values in embedding_lookup_sparse().
+aggregation_weights: A list of rank 1 Tensors containing per training example
+ aggregation weights. It corresponds to sp_weights.values in
embedding_lookup_sparse().
-embedding_indices: A list of Rank 1 Tensors, corresponds to sp_ids.values
- in embedding_lookup_sparse().
-aggregation_weights: A list of Rank 1 Tensors, corresponds to sp_weights.values
- in embedding_lookup_sparse().
+mode_override: A string input that overrides the mode specified in the
+ TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+ 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+ in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
device_ordinal: The TPU device to use. Should be >= 0 and less than the number
of TPU cores in the task on which the node is placed.
combiners: A list of string scalars, one for each embedding table that specify
@@ -577,8 +592,11 @@ combiners: A list of string scalars, one for each embedding table that specify
the sum of the weights be 0 for 'mean' or the sum of the squared weights be
0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
all tables.
-table_ids: A list of int. table_ids[i] indicates which embedding table to look
- up ith feature in the list.
+table_ids: A list of integers specifying the identifier of the embedding table
+ (offset of TableDescriptor in the TPUEmbeddingConfiguration) to lookup the
+ corresponding input. The ith input is looked up using table_ids[i]. The size
+ of the table_ids list must be equal to that of sample_indices,
+ embedding_indices and aggregation_weights.
)doc");
} // namespace tensorflow
diff --git a/tensorflow/contrib/tpu/python/ops/tpu_ops.py b/tensorflow/contrib/tpu/python/ops/tpu_ops.py
index e2e4acadab..968adccf2b 100644
--- a/tensorflow/contrib/tpu/python/ops/tpu_ops.py
+++ b/tensorflow/contrib/tpu/python/ops/tpu_ops.py
@@ -227,6 +227,154 @@ if platform.system() != "Windows":
inputs=inputs, learning_rates=learning_rates, config=config, name=name)
+ send_tpu_embedding_gradients.__doc__ = (
+ gen_tpu_ops._send_tpu_embedding_gradients.__doc__)
+
+ # pylint: disable=protected-access
+ def enqueue_tpu_embedding_integer_batch(batch,
+ device_ordinal,
+ mode_override=None,
+ name=None):
+ """A placeholder op for enqueueing embedding IDs to the TPU.
+
+ Args:
+ batch: A list of 1D tensors, one for each embedding table, containing the
+ indices into the tables.
+ device_ordinal: The TPU device to use. Should be >= 0 and less than the
+ number of TPU cores in the task on which the node is placed.
+ mode_override: A string input that overrides the mode specified in the
+ TPUEmbeddingConfiguration. Supported values are {'unspecified',
+ 'inference', 'training', 'backward_pass_only'}. When set to
+ 'unspecified', the mode set in TPUEmbeddingConfiguration is used,
+ otherwise mode_override is used (optional).
+ name: A name for the operation (optional).
+
+ Returns:
+ An EnqueueTPUEmbeddingIntegerBatch operation.
+ """
+ if mode_override is None:
+ mode_override = "unspecified"
+ return gen_tpu_ops._enqueue_tpu_embedding_integer_batch(
+ batch=batch,
+ device_ordinal=device_ordinal,
+ mode_override=mode_override,
+ name=name)
+
+ enqueue_tpu_embedding_integer_batch.__doc__ = (
+ gen_tpu_ops._enqueue_tpu_embedding_integer_batch.__doc__)
+
+ # pylint: disable=protected-access
+ def enqueue_tpu_embedding_sparse_batch(sample_indices,
+ embedding_indices,
+ aggregation_weights,
+ device_ordinal,
+ combiners=None,
+ mode_override=None,
+ name=None):
+ """A placeholder op for enqueueing embedding IDs to the TPU.
+
+ Args:
+ sample_indices: A list of rank 1 Tensors specifying the training example
+ and feature to which the corresponding embedding_indices and
+ aggregation_weights values belong. sample_indices[i] must equal b * nf +
+ f, where nf is the number of features from the corresponding table, f is
+ in [0, nf), and b is in [0, batch size).
+ embedding_indices: A list of rank 1 Tensors, indices into the embedding
+ tables.
+ aggregation_weights: A list of rank 1 Tensors containing per sample --
+ i.e. per (training example, feature) -- aggregation weights.
+ device_ordinal: The TPU device to use. Should be >= 0 and less than the
+ number of TPU cores in the task on which the node is placed.
+ combiners: A list of string scalars, one for each embedding table that
+ specify how to normalize the embedding activations after weighted
+ summation. Supported combiners are 'mean', 'sum', or 'sqrtn'. It is
+ invalid to have the sum of the weights be 0 for 'mean' or the sum of the
+ squared weights be 0 for 'sqrtn'. If combiners isn't passed, the default
+ is to use 'sum' for all tables (optional).
+ mode_override: A string input that overrides the mode specified in the
+ TPUEmbeddingConfiguration. Supported values are {'unspecified',
+ 'inference', 'training', 'backward_pass_only'}. When set to
+ 'unspecified', the mode set in TPUEmbeddingConfiguration is used,
+ otherwise mode_override is used (optional).
+ name: A name for the operation (optional).
+
+ Returns:
+ An EnqueueTPUEmbeddingSparseBatch operation.
+ """
+ if mode_override is None:
+ mode_override = "unspecified"
+ return gen_tpu_ops._enqueue_tpu_embedding_sparse_batch(
+ sample_indices=sample_indices,
+ embedding_indices=embedding_indices,
+ aggregation_weights=aggregation_weights,
+ device_ordinal=device_ordinal,
+ combiners=combiners,
+ mode_override=mode_override,
+ name=name)
+
+ enqueue_tpu_embedding_sparse_batch.__doc__ = (
+ gen_tpu_ops._enqueue_tpu_embedding_sparse_batch.__doc__)
+
+ # pylint: disable=protected-access
+ def enqueue_tpu_embedding_sparse_tensor_batch(sample_indices,
+ embedding_indices,
+ aggregation_weights,
+ table_ids,
+ device_ordinal,
+ combiners=None,
+ mode_override=None,
+ name=None):
+ """A placeholder op for enqueueing embedding IDs to the TPU.
+
+ Args:
+ sample_indices: A list of rank 1 Tensors specifying the training example
+ to which the corresponding embedding_indices and aggregation_weights
+ values
+ belong. It corresponds to sp_ids.indices[:,0] in
+ embedding_lookup_sparse().
+ embedding_indices: A list of rank 1 Tensors, indices into the embedding
+ tables. It corresponds to sp_ids.values in embedding_lookup_sparse().
+ aggregation_weights: A list of rank 1 Tensors containing per training
+ example aggregation weights. It corresponds to sp_weights.values in
+ embedding_lookup_sparse().
+ table_ids: A list of integers specifying the identifier of the embedding
+ table (offset of TableDescriptor in the TPUEmbeddingConfiguration) to
+ lookup the corresponding input. The ith input is looked up using
+ table_ids[i]. The size of the table_ids list must be equal to that of
+ sample_indices, embedding_indices and aggregation_weights.
+ device_ordinal: The TPU device to use. Should be >= 0 and less than the
+ number of TPU cores in the task on which the node is placed.
+ combiners: A list of string scalars, one for each embedding table that
+ specify how to normalize the embedding activations after weighted
+ summation. Supported combiners are 'mean', 'sum', or 'sqrtn'. It is
+ invalid to have the sum of the weights be 0 for 'mean' or the sum of the
+ squared weights be 0 for 'sqrtn'. If combiners isn't passed, the default
+ is to use 'sum' for all tables (optional).
+ mode_override: A string input that overrides the mode specified in the
+ TPUEmbeddingConfiguration. Supported values are {'unspecified',
+ 'inference', 'training', 'backward_pass_only'}. When set to
+ 'unspecified', the mode set in TPUEmbeddingConfiguration is used,
+ otherwise mode_override is used (optional).
+ name: A name for the operation (optional).
+
+ Returns:
+ An EnqueueTPUEmbeddingSparseTensorBatch operation.
+ """
+ if mode_override is None:
+ mode_override = "unspecified"
+ return gen_tpu_ops._enqueue_tpu_embedding_sparse_tensor_batch(
+ sample_indices=sample_indices,
+ embedding_indices=embedding_indices,
+ aggregation_weights=aggregation_weights,
+ table_ids=table_ids,
+ device_ordinal=device_ordinal,
+ combiners=combiners,
+ mode_override=mode_override,
+ name=name)
+
+ enqueue_tpu_embedding_sparse_tensor_batch.__doc__ = (
+ gen_tpu_ops._enqueue_tpu_embedding_sparse_tensor_batch.__doc__)
+
else:
# We have already built the appropriate libraries into the binary via CMake
# if we have built contrib, so we don't need this