diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2018-09-27 10:31:36 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-09-27 10:36:29 -0700 |
commit | 334244be6864dd1dbec9bc8bb4996cc286a8e3e3 (patch) | |
tree | d379f72fe1d7a68e74d1ee27bed737775c09bfec /tensorflow/python/kernel_tests | |
parent | 3002b10e29363854c6fc20d788bc65233fd5116f (diff) |
Add tf.strings.unicode_script, which detects the script of a unicode codepoint
based on standard ranges.
PiperOrigin-RevId: 214796357
Diffstat (limited to 'tensorflow/python/kernel_tests')
-rw-r--r-- | tensorflow/python/kernel_tests/BUILD | 12 | ||||
-rw-r--r-- | tensorflow/python/kernel_tests/unicode_script_op_test.py | 57 |
2 files changed, 69 insertions, 0 deletions
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 5183e4d30c..c2e36e5e19 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -1097,6 +1097,18 @@ tf_py_test( ], ) +tf_py_test( + name = "unicode_script_op_test", + size = "small", + srcs = ["unicode_script_op_test.py"], + additional_deps = [ + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:string_ops", + ], +) + cuda_py_test( name = "topk_op_test", size = "small", diff --git a/tensorflow/python/kernel_tests/unicode_script_op_test.py b/tensorflow/python/kernel_tests/unicode_script_op_test.py new file mode 100644 index 0000000000..927e5459ed --- /dev/null +++ b/tensorflow/python/kernel_tests/unicode_script_op_test.py @@ -0,0 +1,57 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== +"""Functional tests for UnicodeScript op.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import string_ops +from tensorflow.python.platform import test + + +class UnicodeScriptOpTest(test.TestCase): + + def testValidScripts(self): + inputs = [ + ord("a"), + 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x82b8, # CJK UNIFIED IDEOGRAPH-82B8 + ord(",") + ] + with self.cached_session(): + input_vector = constant_op.constant(inputs, dtypes.int32) + outputs = string_ops.unicode_script(input_vector).eval() + self.assertAllEqual( + outputs, + [ + 25, # USCRIPT_LATIN (LATN) + 8, # USCRIPT_CYRILLIC (CYRL) + 17, # USCRIPT_HAN (HANI) + 0 # USCRIPT_COMMON (ZYYY) + ]) + + def testInvalidScript(self): + inputs = [-100, 0xffffff] + with self.cached_session(): + input_vector = constant_op.constant(inputs, dtypes.int32) + outputs = string_ops.unicode_script(input_vector).eval() + self.assertAllEqual(outputs, [-1, -1]) + + +if __name__ == "__main__": + test.main() |