diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2018-10-01 19:56:47 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-10-01 19:59:48 -0700 |
commit | 350388fca9cb9509962ff393a9d21fb2879c9179 (patch) | |
tree | b1613b611d5ed4f657a3bda89702d259c7639ec4 /tensorflow/contrib/tpu | |
parent | 991f06fd50fc73285ce415d57f720994c2b2e861 (diff) |
Add mode_override to the TPU embedding enqueue ops. This allows the mode to be
overridden at runtime allowing dynamic switching between inference and training
modes. Not fully implemented yet.
PiperOrigin-RevId: 215325071
Diffstat (limited to 'tensorflow/contrib/tpu')
-rw-r--r-- | tensorflow/contrib/tpu/BUILD | 3 | ||||
-rw-r--r-- | tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc | 52 | ||||
-rw-r--r-- | tensorflow/contrib/tpu/python/ops/tpu_ops.py | 148 |
3 files changed, 186 insertions, 17 deletions
diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index 0c4bdab191..10ed1c2891 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -135,6 +135,9 @@ tf_gen_op_wrapper_py( name = "tpu_ops", hidden = [ "SendTPUEmbeddingGradients", + "EnqueueTPUEmbeddingIntegerBatch", + "EnqueueTPUEmbeddingSparseBatch", + "EnqueueTPUEmbeddingSparseTensorBatch", ], deps = [ ":cross_replica_ops_op_lib", diff --git a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc index ef2f8dd36d..0ef29bdf73 100644 --- a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc +++ b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc @@ -335,7 +335,6 @@ void RegisterPerTableLoadAndRetrieveOps() { tpu::GradientAccumulationSupport grad_accum_support; TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support)); if (grad_accum_support == tpu::GradientAccumulationSupport::kSupported) { - // TODO(gkurian): Condition this on being used internally within Google. OpRegistry::Global()->Register( [alg](OpRegistrationData* op_reg_data) -> Status { return RegisterPerTableLoadOpsForAlgorithmBody(alg, true, @@ -353,7 +352,6 @@ void RegisterPerTableLoadAndRetrieveOps() { tpu::GradientAccumulationSupport grad_accum_support; TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support)); if (grad_accum_support == tpu::GradientAccumulationSupport::kSupported) { - // TODO(gkurian): Condition this on being used internally within Google. OpRegistry::Global()->Register( [alg](OpRegistrationData* op_reg_data) -> Status { return RegisterPerTableRetrieveOpsForAlgorithmBody(alg, true, @@ -366,7 +364,7 @@ void RegisterPerTableLoadAndRetrieveOps() { } // namespace REGISTER_OP("RecvTPUEmbeddingActivations") - .Output("outputs: num_outputs * float") + .Output("outputs: num_outputs * float32") .Attr("num_outputs: int >= 1") .Attr("config: string") .SetIsStateful() @@ -476,7 +474,8 @@ config: Serialized TPUEmbeddingConfiguration proto. REGISTER_OP("EnqueueTPUEmbeddingIntegerBatch") .Input("batch: N * int32") - .Attr("N: int") + .Input("mode_override: string") + .Attr("N: int >= 1") .Attr("device_ordinal: int = -1") .SetIsStateful() .SetShapeFn(shape_inference::UnknownShape) @@ -485,6 +484,10 @@ An op that enqueues a list of input batch tensors to TPUEmbedding. batch: A list of 1D tensors, one for each embedding table, containing the indices into the tables. +mode_override: A string input that overrides the mode specified in the + TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference', + 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set + in TPUEmbeddingConfiguration is used, otherwise mode_override is used. device_ordinal: The TPU device to use. Should be >= 0 and less than the number of TPU cores in the task on which the node is placed. )doc"); @@ -493,7 +496,8 @@ REGISTER_OP("EnqueueTPUEmbeddingSparseBatch") .Input("sample_indices: N * int32") .Input("embedding_indices: N * int32") .Input("aggregation_weights: N * float32") - .Attr("N: int") + .Input("mode_override: string") + .Attr("N: int >= 1") .Attr("device_ordinal: int = -1") .Attr("combiners: list(string) = []") .SetIsStateful() @@ -523,14 +527,18 @@ The tensors at corresponding positions in the three input lists must have the same shape, i.e. rank 1 with dim_size() equal to the total number of lookups into the table described by the corresponding table_id. -sample_indices: A list of Rank 1 Tensors specifying the training example and +sample_indices: A list of rank 1 Tensors specifying the training example and feature to which the corresponding embedding_indices and aggregation_weights values belong. sample_indices[i] must equal b * nf + f, where nf is the number of features from the corresponding table, f is in [0, nf), and b is in [0, batch size). -embedding_indices: A list of Rank 1 Tensors, indices into the embedding tables. -aggregation_weights: A list of Rank 1 Tensors containing per sample -- i.e. per +embedding_indices: A list of rank 1 Tensors, indices into the embedding tables. +aggregation_weights: A list of rank 1 Tensors containing per sample -- i.e. per (training example, feature) -- aggregation weights. +mode_override: A string input that overrides the mode specified in the + TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference', + 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set + in TPUEmbeddingConfiguration is used, otherwise mode_override is used. device_ordinal: The TPU device to use. Should be >= 0 and less than the number of TPU cores in the task on which the node is placed. combiners: A list of string scalars, one for each embedding table that specify @@ -545,7 +553,8 @@ REGISTER_OP("EnqueueTPUEmbeddingSparseTensorBatch") .Input("sample_indices: N * int32") .Input("embedding_indices: N * int32") .Input("aggregation_weights: N * float32") - .Attr("N: int") + .Input("mode_override: string") + .Attr("N: int >= 1") .Attr("device_ordinal: int = -1") .Attr("combiners: list(string) = []") .Attr("table_ids: list(int)") @@ -555,7 +564,7 @@ REGISTER_OP("EnqueueTPUEmbeddingSparseTensorBatch") This Op eases the porting of code that uses tf.nn.embedding_lookup_sparse(). sample_indices[i], embedding_indices[i] and aggregation_weights[i] correspond -to ith feature. table_ids[i] indicates which embedding table to look up ith +to the ith feature. table_ids[i] indicates which embedding table to look up ith feature. The tensors at corresponding positions in the three input lists (sample_indices, @@ -563,12 +572,18 @@ embedding_indices and aggregation_weights) must have the same shape, i.e. rank 1 with dim_size() equal to the total number of lookups into the table described by the corresponding feature. -sample_indices: A list of Rank 1 Tensors, corresponds to sp_ids.indices[:,0] in +sample_indices: A list of rank 1 Tensors specifying the training example to + which the corresponding embedding_indices and aggregation_weights values + belong. It corresponds to sp_ids.indices[:,0] in embedding_lookup_sparse(). +embedding_indices: A list of rank 1 Tensors, indices into the embedding tables. + It corresponds to sp_ids.values in embedding_lookup_sparse(). +aggregation_weights: A list of rank 1 Tensors containing per training example + aggregation weights. It corresponds to sp_weights.values in embedding_lookup_sparse(). -embedding_indices: A list of Rank 1 Tensors, corresponds to sp_ids.values - in embedding_lookup_sparse(). -aggregation_weights: A list of Rank 1 Tensors, corresponds to sp_weights.values - in embedding_lookup_sparse(). +mode_override: A string input that overrides the mode specified in the + TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference', + 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set + in TPUEmbeddingConfiguration is used, otherwise mode_override is used. device_ordinal: The TPU device to use. Should be >= 0 and less than the number of TPU cores in the task on which the node is placed. combiners: A list of string scalars, one for each embedding table that specify @@ -577,8 +592,11 @@ combiners: A list of string scalars, one for each embedding table that specify the sum of the weights be 0 for 'mean' or the sum of the squared weights be 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for all tables. -table_ids: A list of int. table_ids[i] indicates which embedding table to look - up ith feature in the list. +table_ids: A list of integers specifying the identifier of the embedding table + (offset of TableDescriptor in the TPUEmbeddingConfiguration) to lookup the + corresponding input. The ith input is looked up using table_ids[i]. The size + of the table_ids list must be equal to that of sample_indices, + embedding_indices and aggregation_weights. )doc"); } // namespace tensorflow diff --git a/tensorflow/contrib/tpu/python/ops/tpu_ops.py b/tensorflow/contrib/tpu/python/ops/tpu_ops.py index e2e4acadab..968adccf2b 100644 --- a/tensorflow/contrib/tpu/python/ops/tpu_ops.py +++ b/tensorflow/contrib/tpu/python/ops/tpu_ops.py @@ -227,6 +227,154 @@ if platform.system() != "Windows": inputs=inputs, learning_rates=learning_rates, config=config, name=name) + send_tpu_embedding_gradients.__doc__ = ( + gen_tpu_ops._send_tpu_embedding_gradients.__doc__) + + # pylint: disable=protected-access + def enqueue_tpu_embedding_integer_batch(batch, + device_ordinal, + mode_override=None, + name=None): + """A placeholder op for enqueueing embedding IDs to the TPU. + + Args: + batch: A list of 1D tensors, one for each embedding table, containing the + indices into the tables. + device_ordinal: The TPU device to use. Should be >= 0 and less than the + number of TPU cores in the task on which the node is placed. + mode_override: A string input that overrides the mode specified in the + TPUEmbeddingConfiguration. Supported values are {'unspecified', + 'inference', 'training', 'backward_pass_only'}. When set to + 'unspecified', the mode set in TPUEmbeddingConfiguration is used, + otherwise mode_override is used (optional). + name: A name for the operation (optional). + + Returns: + An EnqueueTPUEmbeddingIntegerBatch operation. + """ + if mode_override is None: + mode_override = "unspecified" + return gen_tpu_ops._enqueue_tpu_embedding_integer_batch( + batch=batch, + device_ordinal=device_ordinal, + mode_override=mode_override, + name=name) + + enqueue_tpu_embedding_integer_batch.__doc__ = ( + gen_tpu_ops._enqueue_tpu_embedding_integer_batch.__doc__) + + # pylint: disable=protected-access + def enqueue_tpu_embedding_sparse_batch(sample_indices, + embedding_indices, + aggregation_weights, + device_ordinal, + combiners=None, + mode_override=None, + name=None): + """A placeholder op for enqueueing embedding IDs to the TPU. + + Args: + sample_indices: A list of rank 1 Tensors specifying the training example + and feature to which the corresponding embedding_indices and + aggregation_weights values belong. sample_indices[i] must equal b * nf + + f, where nf is the number of features from the corresponding table, f is + in [0, nf), and b is in [0, batch size). + embedding_indices: A list of rank 1 Tensors, indices into the embedding + tables. + aggregation_weights: A list of rank 1 Tensors containing per sample -- + i.e. per (training example, feature) -- aggregation weights. + device_ordinal: The TPU device to use. Should be >= 0 and less than the + number of TPU cores in the task on which the node is placed. + combiners: A list of string scalars, one for each embedding table that + specify how to normalize the embedding activations after weighted + summation. Supported combiners are 'mean', 'sum', or 'sqrtn'. It is + invalid to have the sum of the weights be 0 for 'mean' or the sum of the + squared weights be 0 for 'sqrtn'. If combiners isn't passed, the default + is to use 'sum' for all tables (optional). + mode_override: A string input that overrides the mode specified in the + TPUEmbeddingConfiguration. Supported values are {'unspecified', + 'inference', 'training', 'backward_pass_only'}. When set to + 'unspecified', the mode set in TPUEmbeddingConfiguration is used, + otherwise mode_override is used (optional). + name: A name for the operation (optional). + + Returns: + An EnqueueTPUEmbeddingSparseBatch operation. + """ + if mode_override is None: + mode_override = "unspecified" + return gen_tpu_ops._enqueue_tpu_embedding_sparse_batch( + sample_indices=sample_indices, + embedding_indices=embedding_indices, + aggregation_weights=aggregation_weights, + device_ordinal=device_ordinal, + combiners=combiners, + mode_override=mode_override, + name=name) + + enqueue_tpu_embedding_sparse_batch.__doc__ = ( + gen_tpu_ops._enqueue_tpu_embedding_sparse_batch.__doc__) + + # pylint: disable=protected-access + def enqueue_tpu_embedding_sparse_tensor_batch(sample_indices, + embedding_indices, + aggregation_weights, + table_ids, + device_ordinal, + combiners=None, + mode_override=None, + name=None): + """A placeholder op for enqueueing embedding IDs to the TPU. + + Args: + sample_indices: A list of rank 1 Tensors specifying the training example + to which the corresponding embedding_indices and aggregation_weights + values + belong. It corresponds to sp_ids.indices[:,0] in + embedding_lookup_sparse(). + embedding_indices: A list of rank 1 Tensors, indices into the embedding + tables. It corresponds to sp_ids.values in embedding_lookup_sparse(). + aggregation_weights: A list of rank 1 Tensors containing per training + example aggregation weights. It corresponds to sp_weights.values in + embedding_lookup_sparse(). + table_ids: A list of integers specifying the identifier of the embedding + table (offset of TableDescriptor in the TPUEmbeddingConfiguration) to + lookup the corresponding input. The ith input is looked up using + table_ids[i]. The size of the table_ids list must be equal to that of + sample_indices, embedding_indices and aggregation_weights. + device_ordinal: The TPU device to use. Should be >= 0 and less than the + number of TPU cores in the task on which the node is placed. + combiners: A list of string scalars, one for each embedding table that + specify how to normalize the embedding activations after weighted + summation. Supported combiners are 'mean', 'sum', or 'sqrtn'. It is + invalid to have the sum of the weights be 0 for 'mean' or the sum of the + squared weights be 0 for 'sqrtn'. If combiners isn't passed, the default + is to use 'sum' for all tables (optional). + mode_override: A string input that overrides the mode specified in the + TPUEmbeddingConfiguration. Supported values are {'unspecified', + 'inference', 'training', 'backward_pass_only'}. When set to + 'unspecified', the mode set in TPUEmbeddingConfiguration is used, + otherwise mode_override is used (optional). + name: A name for the operation (optional). + + Returns: + An EnqueueTPUEmbeddingSparseTensorBatch operation. + """ + if mode_override is None: + mode_override = "unspecified" + return gen_tpu_ops._enqueue_tpu_embedding_sparse_tensor_batch( + sample_indices=sample_indices, + embedding_indices=embedding_indices, + aggregation_weights=aggregation_weights, + table_ids=table_ids, + device_ordinal=device_ordinal, + combiners=combiners, + mode_override=mode_override, + name=name) + + enqueue_tpu_embedding_sparse_tensor_batch.__doc__ = ( + gen_tpu_ops._enqueue_tpu_embedding_sparse_tensor_batch.__doc__) + else: # We have already built the appropriate libraries into the binary via CMake # if we have built contrib, so we don't need this |