diff options
author | Yong Tang <yong.tang.github@outlook.com> | 2018-05-29 21:05:30 +0000 |
---|---|---|
committer | Yong Tang <yong.tang.github@outlook.com> | 2018-05-29 21:07:15 +0000 |
commit | a81adaf865d4ce5f0452db3f619df4fc23c5a327 (patch) | |
tree | 5ec5d230bef006366882c56ec3bfdc6d74a2c666 /tensorflow/python/ops/string_ops.py | |
parent | 003484dc049ac1df55912b53826d473d99819ee1 (diff) |
Update API defs
Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
Diffstat (limited to 'tensorflow/python/ops/string_ops.py')
-rw-r--r-- | tensorflow/python/ops/string_ops.py | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py index 961e63d04e..0280c89c10 100644 --- a/tensorflow/python/ops/string_ops.py +++ b/tensorflow/python/ops/string_ops.py @@ -93,6 +93,45 @@ def string_split(source, delimiter=" ", skip_empty=True): # pylint: disable=inv @tf_export("strings.split") def string_split_v2(source, sep=None, maxsplit=-1): + """Split elements of `source` based on `sep` into a `SparseTensor`. + + Let N be the size of source (typically N will be the batch size). Split each + element of `source` based on `sep` and return a `SparseTensor` + containing the split tokens. Empty tokens are ignored. + + For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c', + then the output will be + + st.indices = [0, 0; + 0, 1; + 1, 0; + 1, 1; + 1, 2] + st.shape = [2, 3] + st.values = ['hello', 'world', 'a', 'b', 'c'] + + If `sep` is given, consecutive delimiters are not grouped together and are + deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and + sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty + string, consecutive whitespace are regarded as a single separator, and the + result will contain no empty strings at the startor end if the string has + leading or trailing whitespace. + + Note that the above mentioned behavior matches python's str.split. + + Args: + source: `1-D` string `Tensor`, the strings to split. + sep: `0-D` string `Tensor`, the delimiter character. + maxsplit: An `int`. If `maxsplit > 0`, limit of the split of the result. + + Raises: + ValueError: If sep is not a string. + + Returns: + A `SparseTensor` of rank `2`, the strings split according to the delimiter. + The first column of the indices corresponds to the row in `source` and the + second column corresponds to the index of the split component in this row. + """ if sep is None: sep = '' sep = ops.convert_to_tensor(sep, dtype=dtypes.string) |