aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow
diff options
context:
space:
mode:
authorGravatar Yong Tang <yong.tang.github@outlook.com>2018-05-29 21:05:30 +0000
committerGravatar Yong Tang <yong.tang.github@outlook.com>2018-05-29 21:07:15 +0000
commita81adaf865d4ce5f0452db3f619df4fc23c5a327 (patch)
tree5ec5d230bef006366882c56ec3bfdc6d74a2c666 /tensorflow
parent003484dc049ac1df55912b53826d473d99819ee1 (diff)
Update API defs
Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
Diffstat (limited to 'tensorflow')
-rw-r--r--tensorflow/core/api_def/base_api/api_def_StringSplitV2.pbtxt48
-rw-r--r--tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt4
-rw-r--r--tensorflow/python/ops/string_ops.py39
-rw-r--r--tensorflow/tools/api/golden/tensorflow.strings.pbtxt4
4 files changed, 95 insertions, 0 deletions
diff --git a/tensorflow/core/api_def/base_api/api_def_StringSplitV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_StringSplitV2.pbtxt
new file mode 100644
index 0000000000..6e13d0d049
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_StringSplitV2.pbtxt
@@ -0,0 +1,48 @@
+op {
+ graph_op_name: "StringSplitV2"
+ in_arg {
+ name: "input"
+ description: <<END
+`1-D` string `Tensor`, the strings to split.
+END
+ }
+ in_arg {
+ name: "sep"
+ description: <<END
+`0-D` string `Tensor`, the delimiter character.
+END
+ }
+ attr {
+ name: "maxsplit"
+ description: <<END
+An `int`. If `maxsplit > 0`, limit of the split of the result.
+END
+ }
+ summary: "Split elements of `source` based on `sep` into a `SparseTensor`."
+ description: <<END
+Let N be the size of source (typically N will be the batch size). Split each
+element of `source` based on `sep` and return a `SparseTensor`
+containing the split tokens. Empty tokens are ignored.
+
+For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c',
+then the output will be
+```
+st.indices = [0, 0;
+ 0, 1;
+ 1, 0;
+ 1, 1;
+ 1, 2]
+st.shape = [2, 3]
+st.values = ['hello', 'world', 'a', 'b', 'c']
+```
+
+If `sep` is given, consecutive delimiters are not grouped together and are
+deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and
+sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty
+string, consecutive whitespace are regarded as a single separator, and the
+result will contain no empty strings at the startor end if the string has
+leading or trailing whitespace.
+
+Note that the above mentioned behavior matches python's str.split.
+END
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt
new file mode 100644
index 0000000000..0e8576fb01
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt
@@ -0,0 +1,4 @@
+op {
+ graph_op_name: "StringSplitV2"
+ visibility: HIDDEN
+}
diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py
index 961e63d04e..0280c89c10 100644
--- a/tensorflow/python/ops/string_ops.py
+++ b/tensorflow/python/ops/string_ops.py
@@ -93,6 +93,45 @@ def string_split(source, delimiter=" ", skip_empty=True): # pylint: disable=inv
@tf_export("strings.split")
def string_split_v2(source, sep=None, maxsplit=-1):
+ """Split elements of `source` based on `sep` into a `SparseTensor`.
+
+ Let N be the size of source (typically N will be the batch size). Split each
+ element of `source` based on `sep` and return a `SparseTensor`
+ containing the split tokens. Empty tokens are ignored.
+
+ For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c',
+ then the output will be
+
+ st.indices = [0, 0;
+ 0, 1;
+ 1, 0;
+ 1, 1;
+ 1, 2]
+ st.shape = [2, 3]
+ st.values = ['hello', 'world', 'a', 'b', 'c']
+
+ If `sep` is given, consecutive delimiters are not grouped together and are
+ deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and
+ sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty
+ string, consecutive whitespace are regarded as a single separator, and the
+ result will contain no empty strings at the startor end if the string has
+ leading or trailing whitespace.
+
+ Note that the above mentioned behavior matches python's str.split.
+
+ Args:
+ source: `1-D` string `Tensor`, the strings to split.
+ sep: `0-D` string `Tensor`, the delimiter character.
+ maxsplit: An `int`. If `maxsplit > 0`, limit of the split of the result.
+
+ Raises:
+ ValueError: If sep is not a string.
+
+ Returns:
+ A `SparseTensor` of rank `2`, the strings split according to the delimiter.
+ The first column of the indices corresponds to the row in `source` and the
+ second column corresponds to the index of the split component in this row.
+ """
if sep is None:
sep = ''
sep = ops.convert_to_tensor(sep, dtype=dtypes.string)
diff --git a/tensorflow/tools/api/golden/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/tensorflow.strings.pbtxt
index a3fbe95bba..b641c39feb 100644
--- a/tensorflow/tools/api/golden/tensorflow.strings.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.strings.pbtxt
@@ -4,4 +4,8 @@ tf_module {
name: "regex_full_match"
argspec: "args=[\'input\', \'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
}
+ member_method {
+ name: "split"
+ argspec: "args=[\'source\', \'sep\', \'maxsplit\'], varargs=None, keywords=None, defaults=[\'None\', \'-1\'], "
+ }
}