aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Derek Murray <mrry@google.com>2018-03-07 09:53:52 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-03-07 10:02:45 -0800
commitbe0fa12386c019ffcc65bba5005f3a9e4ad4348c (patch)
tree9f72b5f1a148f3cc67fbd2de36e43a2ca89c9549
parentc6806ae8fcefa6deb701ff06a50a060348bcee90 (diff)
[tf.data] Improve docstring for `tf.data.Dataset.padded_batch()`.
PiperOrigin-RevId: 188190458
-rw-r--r--tensorflow/python/data/ops/dataset_ops.py30
1 files changed, 25 insertions, 5 deletions
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 7c5aa4c767..6539e91c13 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -774,11 +774,31 @@ class Dataset(object):
def padded_batch(self, batch_size, padded_shapes, padding_values=None):
"""Combines consecutive elements of this dataset into padded batches.
- Like `Dataset.dense_to_sparse_batch()`, this method combines
- multiple consecutive elements of this dataset, which might have
- different shapes, into a single element. The tensors in the
- resulting element have an additional outer dimension, and are
- padded to the respective shape in `padded_shapes`.
+ This transformation combines multiple consecutive elements of the input
+ dataset into a single element. Like @{tf.data.Dataset.batch}, the tensors
+ in the resulting element have an additional outer dimension, which will be
+ `batch_size` for all but the last element, and `N % batch_size` for the
+ last element (where `N` is the number of elements in this dataset). Unlike
+ @{tf.data.Dataset.batch}, the elements may have different shapes for some
+ of their components, and this transformation will pad each component to
+ the respective shape in `padding_shapes`. The `padding_shapes` argument
+ determines the resulting shape for each dimension of each component in an
+ output element:
+
+ * If the dimension is a constant (e.g. `tf.Dimension(37)`), the component
+ will be padded out to that length in that dimension.
+ * If the dimension is unknown (e.g. `tf.Dimension(None)`), the component
+ will be padded out to the maximum length of all elements in that
+ dimension.
+
+ NOTE: If the number of elements (`N`) in this dataset is not an exact
+ multiple of `batch_size`, the final batch contain smaller tensors with
+ shape `N % batch_size` in the batch dimension. If your program depends on
+ the batches having the same shape, consider using the
+ @{tf.contrib.data.padded_batch_and_drop_remainder} transformation instead.
+
+ See also @{tf.contrib.data.dense_to_sparse_batch}, which combines elements
+ that may have different shapes into a @{tf.SparseTensor}.
Args:
batch_size: A `tf.int64` scalar `tf.Tensor`, representing the number of