diff options
author | 2018-05-29 21:05:30 +0000 | |
---|---|---|
committer | 2018-05-29 21:07:15 +0000 | |
commit | a81adaf865d4ce5f0452db3f619df4fc23c5a327 (patch) | |
tree | 5ec5d230bef006366882c56ec3bfdc6d74a2c666 /tensorflow | |
parent | 003484dc049ac1df55912b53826d473d99819ee1 (diff) |
Update API defs
Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
Diffstat (limited to 'tensorflow')
4 files changed, 95 insertions, 0 deletions
diff --git a/tensorflow/core/api_def/base_api/api_def_StringSplitV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_StringSplitV2.pbtxt new file mode 100644 index 0000000000..6e13d0d049 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_StringSplitV2.pbtxt @@ -0,0 +1,48 @@ +op { + graph_op_name: "StringSplitV2" + in_arg { + name: "input" + description: <<END +`1-D` string `Tensor`, the strings to split. +END + } + in_arg { + name: "sep" + description: <<END +`0-D` string `Tensor`, the delimiter character. +END + } + attr { + name: "maxsplit" + description: <<END +An `int`. If `maxsplit > 0`, limit of the split of the result. +END + } + summary: "Split elements of `source` based on `sep` into a `SparseTensor`." + description: <<END +Let N be the size of source (typically N will be the batch size). Split each +element of `source` based on `sep` and return a `SparseTensor` +containing the split tokens. Empty tokens are ignored. + +For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c', +then the output will be +``` +st.indices = [0, 0; + 0, 1; + 1, 0; + 1, 1; + 1, 2] +st.shape = [2, 3] +st.values = ['hello', 'world', 'a', 'b', 'c'] +``` + +If `sep` is given, consecutive delimiters are not grouped together and are +deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and +sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty +string, consecutive whitespace are regarded as a single separator, and the +result will contain no empty strings at the startor end if the string has +leading or trailing whitespace. + +Note that the above mentioned behavior matches python's str.split. +END +} diff --git a/tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt new file mode 100644 index 0000000000..0e8576fb01 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "StringSplitV2" + visibility: HIDDEN +} diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py index 961e63d04e..0280c89c10 100644 --- a/tensorflow/python/ops/string_ops.py +++ b/tensorflow/python/ops/string_ops.py @@ -93,6 +93,45 @@ def string_split(source, delimiter=" ", skip_empty=True): # pylint: disable=inv @tf_export("strings.split") def string_split_v2(source, sep=None, maxsplit=-1): + """Split elements of `source` based on `sep` into a `SparseTensor`. + + Let N be the size of source (typically N will be the batch size). Split each + element of `source` based on `sep` and return a `SparseTensor` + containing the split tokens. Empty tokens are ignored. + + For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c', + then the output will be + + st.indices = [0, 0; + 0, 1; + 1, 0; + 1, 1; + 1, 2] + st.shape = [2, 3] + st.values = ['hello', 'world', 'a', 'b', 'c'] + + If `sep` is given, consecutive delimiters are not grouped together and are + deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and + sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty + string, consecutive whitespace are regarded as a single separator, and the + result will contain no empty strings at the startor end if the string has + leading or trailing whitespace. + + Note that the above mentioned behavior matches python's str.split. + + Args: + source: `1-D` string `Tensor`, the strings to split. + sep: `0-D` string `Tensor`, the delimiter character. + maxsplit: An `int`. If `maxsplit > 0`, limit of the split of the result. + + Raises: + ValueError: If sep is not a string. + + Returns: + A `SparseTensor` of rank `2`, the strings split according to the delimiter. + The first column of the indices corresponds to the row in `source` and the + second column corresponds to the index of the split component in this row. + """ if sep is None: sep = '' sep = ops.convert_to_tensor(sep, dtype=dtypes.string) diff --git a/tensorflow/tools/api/golden/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/tensorflow.strings.pbtxt index a3fbe95bba..b641c39feb 100644 --- a/tensorflow/tools/api/golden/tensorflow.strings.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.strings.pbtxt @@ -4,4 +4,8 @@ tf_module { name: "regex_full_match" argspec: "args=[\'input\', \'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "split" + argspec: "args=[\'source\', \'sep\', \'maxsplit\'], varargs=None, keywords=None, defaults=[\'None\', \'-1\'], " + } } |