aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/python/ops/string_ops.py
diff options
context:
space:
mode:
authorGravatar Yong Tang <yong.tang.github@outlook.com>2018-05-29 21:05:30 +0000
committerGravatar Yong Tang <yong.tang.github@outlook.com>2018-05-29 21:07:15 +0000
commita81adaf865d4ce5f0452db3f619df4fc23c5a327 (patch)
tree5ec5d230bef006366882c56ec3bfdc6d74a2c666 /tensorflow/python/ops/string_ops.py
parent003484dc049ac1df55912b53826d473d99819ee1 (diff)
Update API defs
Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
Diffstat (limited to 'tensorflow/python/ops/string_ops.py')
-rw-r--r--tensorflow/python/ops/string_ops.py39
1 files changed, 39 insertions, 0 deletions
diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py
index 961e63d04e..0280c89c10 100644
--- a/tensorflow/python/ops/string_ops.py
+++ b/tensorflow/python/ops/string_ops.py
@@ -93,6 +93,45 @@ def string_split(source, delimiter=" ", skip_empty=True): # pylint: disable=inv
@tf_export("strings.split")
def string_split_v2(source, sep=None, maxsplit=-1):
+ """Split elements of `source` based on `sep` into a `SparseTensor`.
+
+ Let N be the size of source (typically N will be the batch size). Split each
+ element of `source` based on `sep` and return a `SparseTensor`
+ containing the split tokens. Empty tokens are ignored.
+
+ For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c',
+ then the output will be
+
+ st.indices = [0, 0;
+ 0, 1;
+ 1, 0;
+ 1, 1;
+ 1, 2]
+ st.shape = [2, 3]
+ st.values = ['hello', 'world', 'a', 'b', 'c']
+
+ If `sep` is given, consecutive delimiters are not grouped together and are
+ deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and
+ sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty
+ string, consecutive whitespace are regarded as a single separator, and the
+ result will contain no empty strings at the startor end if the string has
+ leading or trailing whitespace.
+
+ Note that the above mentioned behavior matches python's str.split.
+
+ Args:
+ source: `1-D` string `Tensor`, the strings to split.
+ sep: `0-D` string `Tensor`, the delimiter character.
+ maxsplit: An `int`. If `maxsplit > 0`, limit of the split of the result.
+
+ Raises:
+ ValueError: If sep is not a string.
+
+ Returns:
+ A `SparseTensor` of rank `2`, the strings split according to the delimiter.
+ The first column of the indices corresponds to the row in `source` and the
+ second column corresponds to the index of the split component in this row.
+ """
if sep is None:
sep = ''
sep = ops.convert_to_tensor(sep, dtype=dtypes.string)