Allow multiple delimiters for string split op.

Change: 140374223
author: A. Unique TensorFlower <gardener@tensorflow.org> 2016-11-28 10:36:26 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-11-28 10:45:37 -0800
commit: b25ca9e176bdd1a19a47850fa951bc6d7f5b76d2 (patch)
tree: ff7f815c4ff0c4f7284e9f06be9db6939c92efb7 /tensorflow/python/ops/string_ops.py
parent: 17fe6574eb7929f92d081a754144747527af2a24 (diff)
1 files changed, 5 insertions, 9 deletions
diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py
index c46c24af9a..57e7742355 100644
--- a/tensorflow/python/ops/string_ops.py
+++ b/tensorflow/python/ops/string_ops.py
@@ -46,8 +46,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import six
-
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
@@ -70,7 +68,8 @@ def string_split(source, delimiter=" "):  # pylint: disable=invalid-name
 
   If `delimiter` is an empty string, each element of the `source` is split
   into individual strings, each containing one byte. (This includes splitting
-  multibyte sequences of UTF-8.)
+  multibyte sequences of UTF-8.) If delimiter contains multiple bytes, it is
+  treated as a set of delimiters with each considered a potential split point.
 
   For example:
   N = 2, source[0] is 'hello world' and source[1] is 'a b c', then the output
@@ -89,17 +88,14 @@ def string_split(source, delimiter=" "):  # pylint: disable=invalid-name
     delimiter: `0-D` string `Tensor`, the delimiter character, the string should
       be length 0 or 1.
 
+  Raises:
+    ValueError: If delimiter is not a string.
+
   Returns:
     A `SparseTensor` of rank `2`, the strings split according to the delimiter.
     The first column of the indices corresponds to the row in `source` and the
     second column corresponds to the index of the split component in this row.
-
-  Raises:
-    ValueError: If delimiter is not a single-byte character.
   """
-  if isinstance(delimiter, six.string_types) and len(delimiter) > 1:
-    raise ValueError("delimiter must be a single byte-character, got %s" %
-                     delimiter)
   delimiter = ops.convert_to_tensor(delimiter, dtype=dtypes.string)
   source = ops.convert_to_tensor(source, dtype=dtypes.string)
author	A. Unique TensorFlower <gardener@tensorflow.org>	2016-11-28 10:36:26 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-11-28 10:45:37 -0800
commit	b25ca9e176bdd1a19a47850fa951bc6d7f5b76d2 (patch)
tree	ff7f815c4ff0c4f7284e9f06be9db6939c92efb7 /tensorflow/python/ops/string_ops.py
parent	17fe6574eb7929f92d081a754144747527af2a24 (diff)