diff options
author | 2016-11-28 10:36:26 -0800 | |
---|---|---|
committer | 2016-11-28 10:45:37 -0800 | |
commit | b25ca9e176bdd1a19a47850fa951bc6d7f5b76d2 (patch) | |
tree | ff7f815c4ff0c4f7284e9f06be9db6939c92efb7 /tensorflow/python/ops/string_ops.py | |
parent | 17fe6574eb7929f92d081a754144747527af2a24 (diff) |
Allow multiple delimiters for string split op.
Change: 140374223
Diffstat (limited to 'tensorflow/python/ops/string_ops.py')
-rw-r--r-- | tensorflow/python/ops/string_ops.py | 14 |
1 files changed, 5 insertions, 9 deletions
diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py index c46c24af9a..57e7742355 100644 --- a/tensorflow/python/ops/string_ops.py +++ b/tensorflow/python/ops/string_ops.py @@ -46,8 +46,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import six - from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor @@ -70,7 +68,8 @@ def string_split(source, delimiter=" "): # pylint: disable=invalid-name If `delimiter` is an empty string, each element of the `source` is split into individual strings, each containing one byte. (This includes splitting - multibyte sequences of UTF-8.) + multibyte sequences of UTF-8.) If delimiter contains multiple bytes, it is + treated as a set of delimiters with each considered a potential split point. For example: N = 2, source[0] is 'hello world' and source[1] is 'a b c', then the output @@ -89,17 +88,14 @@ def string_split(source, delimiter=" "): # pylint: disable=invalid-name delimiter: `0-D` string `Tensor`, the delimiter character, the string should be length 0 or 1. + Raises: + ValueError: If delimiter is not a string. + Returns: A `SparseTensor` of rank `2`, the strings split according to the delimiter. The first column of the indices corresponds to the row in `source` and the second column corresponds to the index of the split component in this row. - - Raises: - ValueError: If delimiter is not a single-byte character. """ - if isinstance(delimiter, six.string_types) and len(delimiter) > 1: - raise ValueError("delimiter must be a single byte-character, got %s" % - delimiter) delimiter = ops.convert_to_tensor(delimiter, dtype=dtypes.string) source = ops.convert_to_tensor(source, dtype=dtypes.string) |