aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/ops
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-10-04 11:30:52 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-10-04 11:34:46 -0700
commit700c3325311e16be9bb4856cbf944d1871ff35c1 (patch)
tree9ae88328889950abaa951a628de7212caec8c026 /tensorflow/core/ops
parentc8d5054e8c12800f0c3db0e51f3d5902e04eaa37 (diff)
Add "encoding" attribute to string substr op, which controls how each "character" is treated:
* BYTE: Position & length refer to bytes in the string. (Default) * UTF8: The string is interpreted as UTF-8 encoded Unicode code points, and position & length are treated relative to them. RELNOTES: Add option to get substring using Unicode characters PiperOrigin-RevId: 215773373
Diffstat (limited to 'tensorflow/core/ops')
-rw-r--r--tensorflow/core/ops/string_ops.cc1
1 files changed, 1 insertions, 0 deletions
diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc
index b4fbde54d9..94d71a4113 100644
--- a/tensorflow/core/ops/string_ops.cc
+++ b/tensorflow/core/ops/string_ops.cc
@@ -223,6 +223,7 @@ REGISTER_OP("Substr")
.Input("len: T")
.Output("output: string")
.Attr("T: {int32, int64}")
+ .Attr("unit: {'BYTE', 'UTF8_CHAR'} = 'BYTE'")
.SetShapeFn([](InferenceContext* c) {
ShapeHandle pos_shape = c->input(1);
ShapeHandle len_shape = c->input(2);