aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/python/ops/string_ops.py
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2016-11-28 10:36:26 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-11-28 10:45:37 -0800
commitb25ca9e176bdd1a19a47850fa951bc6d7f5b76d2 (patch)
treeff7f815c4ff0c4f7284e9f06be9db6939c92efb7 /tensorflow/python/ops/string_ops.py
parent17fe6574eb7929f92d081a754144747527af2a24 (diff)
Allow multiple delimiters for string split op.
Change: 140374223
Diffstat (limited to 'tensorflow/python/ops/string_ops.py')
-rw-r--r--tensorflow/python/ops/string_ops.py14
1 files changed, 5 insertions, 9 deletions
diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py
index c46c24af9a..57e7742355 100644
--- a/tensorflow/python/ops/string_ops.py
+++ b/tensorflow/python/ops/string_ops.py
@@ -46,8 +46,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-import six
-
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.framework import sparse_tensor
@@ -70,7 +68,8 @@ def string_split(source, delimiter=" "): # pylint: disable=invalid-name
If `delimiter` is an empty string, each element of the `source` is split
into individual strings, each containing one byte. (This includes splitting
- multibyte sequences of UTF-8.)
+ multibyte sequences of UTF-8.) If delimiter contains multiple bytes, it is
+ treated as a set of delimiters with each considered a potential split point.
For example:
N = 2, source[0] is 'hello world' and source[1] is 'a b c', then the output
@@ -89,17 +88,14 @@ def string_split(source, delimiter=" "): # pylint: disable=invalid-name
delimiter: `0-D` string `Tensor`, the delimiter character, the string should
be length 0 or 1.
+ Raises:
+ ValueError: If delimiter is not a string.
+
Returns:
A `SparseTensor` of rank `2`, the strings split according to the delimiter.
The first column of the indices corresponds to the row in `source` and the
second column corresponds to the index of the split component in this row.
-
- Raises:
- ValueError: If delimiter is not a single-byte character.
"""
- if isinstance(delimiter, six.string_types) and len(delimiter) > 1:
- raise ValueError("delimiter must be a single byte-character, got %s" %
- delimiter)
delimiter = ops.convert_to_tensor(delimiter, dtype=dtypes.string)
source = ops.convert_to_tensor(source, dtype=dtypes.string)