aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/ops/string_ops.cc
diff options
context:
space:
mode:
authorGravatar Anna R <annarev@google.com>2018-01-09 13:32:17 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-01-09 13:36:12 -0800
commit3e852d462aaba446f62f76007405c0794a6087b9 (patch)
tree790dc1747aa319facc98f18450a94015f83a9a89 /tensorflow/core/ops/string_ops.cc
parent55cd506ab8220c6a1075965eb7839cac4af1db3e (diff)
Automated g4 rollback of changelist 180691955
PiperOrigin-RevId: 181365803
Diffstat (limited to 'tensorflow/core/ops/string_ops.cc')
-rw-r--r--tensorflow/core/ops/string_ops.cc263
1 files changed, 10 insertions, 253 deletions
diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc
index aebd14c7e5..8beb28de0a 100644
--- a/tensorflow/core/ops/string_ops.cc
+++ b/tensorflow/core/ops/string_ops.cc
@@ -27,67 +27,20 @@ REGISTER_OP("StringToHashBucketFast")
.Input("input: string")
.Output("output: int64")
.Attr("num_buckets: int >= 1")
- .SetShapeFn(shape_inference::UnchangedShape)
- .Doc(R"doc(
-Converts each string in the input Tensor to its hash mod by a number of buckets.
-
-The hash function is deterministic on the content of the string within the
-process and will never change. However, it is not suitable for cryptography.
-This function may be used when CPU time is scarce and inputs are trusted or
-unimportant. There is a risk of adversaries constructing inputs that all hash
-to the same bucket. To prevent this problem, use a strong hash function with
-`tf.string_to_hash_bucket_strong`.
-
-input: The strings to assign a hash bucket.
-num_buckets: The number of buckets.
-output: A Tensor of the same shape as the input `string_tensor`.
-)doc");
+ .SetShapeFn(shape_inference::UnchangedShape);
REGISTER_OP("StringToHashBucketStrong")
.Input("input: string")
.Output("output: int64")
.Attr("num_buckets: int >= 1")
.Attr("key: list(int)")
- .SetShapeFn(shape_inference::UnchangedShape)
- .Doc(R"doc(
-Converts each string in the input Tensor to its hash mod by a number of buckets.
-
-The hash function is deterministic on the content of the string within the
-process. The hash function is a keyed hash function, where attribute `key`
-defines the key of the hash function. `key` is an array of 2 elements.
-
-A strong hash is important when inputs may be malicious, e.g. URLs with
-additional components. Adversaries could try to make their inputs hash to the
-same bucket for a denial-of-service attack or to skew the results. A strong
-hash prevents this by making it difficult, if not infeasible, to compute inputs
-that hash to the same bucket. This comes at a cost of roughly 4x higher compute
-time than `tf.string_to_hash_bucket_fast`.
-
-input: The strings to assign a hash bucket.
-num_buckets: The number of buckets.
-key: The key for the keyed hash function passed as a list of two uint64
- elements.
-output: A Tensor of the same shape as the input `string_tensor`.
-)doc");
+ .SetShapeFn(shape_inference::UnchangedShape);
REGISTER_OP("StringToHashBucket")
.Input("string_tensor: string")
.Output("output: int64")
.Attr("num_buckets: int >= 1")
- .SetShapeFn(shape_inference::UnchangedShape)
- .Doc(R"doc(
-Converts each string in the input Tensor to its hash mod by a number of buckets.
-
-The hash function is deterministic on the content of the string within the
-process.
-
-Note that the hash function may change from time to time.
-This functionality will be deprecated and it's recommended to use
-`tf.string_to_hash_bucket_fast()` or `tf.string_to_hash_bucket_strong()`.
-
-num_buckets: The number of buckets.
-output: A Tensor of the same shape as the input `string_tensor`.
-)doc");
+ .SetShapeFn(shape_inference::UnchangedShape);
REGISTER_OP("ReduceJoin")
.Input("inputs: string")
@@ -95,41 +48,7 @@ REGISTER_OP("ReduceJoin")
.Attr("keep_dims: bool = false")
.Attr("separator: string = ''")
.Output("output: string")
- .SetShapeFn(shape_inference::ReductionShape)
- .Doc(R"doc(
-Joins a string Tensor across the given dimensions.
-
-Computes the string join across dimensions in the given string Tensor of shape
-`[d_0, d_1, ..., d_n-1]`. Returns a new Tensor created by joining the input
-strings with the given separator (default: empty string). Negative indices are
-counted backwards from the end, with `-1` being equivalent to `n - 1`.
-
-For example:
-
-```python
-# tensor `a` is [["a", "b"], ["c", "d"]]
-tf.reduce_join(a, 0) ==> ["ac", "bd"]
-tf.reduce_join(a, 1) ==> ["ab", "cd"]
-tf.reduce_join(a, -2) = tf.reduce_join(a, 0) ==> ["ac", "bd"]
-tf.reduce_join(a, -1) = tf.reduce_join(a, 1) ==> ["ab", "cd"]
-tf.reduce_join(a, 0, keep_dims=True) ==> [["ac", "bd"]]
-tf.reduce_join(a, 1, keep_dims=True) ==> [["ab"], ["cd"]]
-tf.reduce_join(a, 0, separator=".") ==> ["a.c", "b.d"]
-tf.reduce_join(a, [0, 1]) ==> ["acbd"]
-tf.reduce_join(a, [1, 0]) ==> ["abcd"]
-tf.reduce_join(a, []) ==> ["abcd"]
-```
-
-inputs: The input to be joined. All reduced indices must have non-zero size.
-reduction_indices: The dimensions to reduce over. Dimensions are reduced in the
- order specified. Omitting `reduction_indices` is equivalent to passing
- `[n-1, n-2, ..., 0]`. Negative indices from `-n` to `-1` are supported.
-keep_dims: If `True`, retain reduced dimensions with length `1`.
-separator: The separator to use when joining.
-
-output: Has shape equal to that of the input with reduced dimensions removed or
- set to `1` depending on `keep_dims`.
-)doc");
+ .SetShapeFn(shape_inference::ReductionShape);
REGISTER_OP("AsString")
.Input("input: T")
@@ -140,22 +59,7 @@ REGISTER_OP("AsString")
.Attr("shortest: bool = false")
.Attr("width: int = -1")
.Attr("fill: string = ''")
- .SetShapeFn(shape_inference::UnchangedShape)
- .Doc(R"doc(
-Converts each entry in the given tensor to strings. Supports many numeric
-types and boolean.
-
-precision: The post-decimal precision to use for floating point numbers.
- Only used if precision > -1.
-scientific: Use scientific notation for floating point numbers.
-shortest: Use shortest representation (either scientific or standard) for
- floating point numbers.
-width: Pad pre-decimal numbers to this width.
- Applies to both floating point and integer numbers.
- Only used if width > -1.
-fill: The value to pad if width > -1. If empty, pads with spaces.
- Another typical value is '0'. String cannot be longer than 1 character.
-)doc");
+ .SetShapeFn(shape_inference::UnchangedShape);
REGISTER_OP("StringJoin")
.Input("inputs: N * string")
@@ -185,16 +89,7 @@ REGISTER_OP("StringJoin")
}
c->set_output(0, out);
return Status::OK();
- })
- .Doc(R"doc(
-Joins the strings in the given list of string tensors into one tensor;
-with the given separator (default is an empty separator).
-
-inputs: A list of string tensors. The tensors must all have the same shape,
- or be scalars. Scalars may be mixed in; these will be broadcast to the shape
- of non-scalar inputs.
-separator: string, an optional join separator.
-)doc");
+ });
REGISTER_OP("StringSplit")
.Input("input: string")
@@ -212,74 +107,18 @@ REGISTER_OP("StringSplit")
c->set_output(1, c->Vector(InferenceContext::kUnknownDim));
c->set_output(2, c->Vector(2));
return Status::OK();
- })
- .Doc(R"doc(
-Split elements of `input` based on `delimiter` into a `SparseTensor`.
-
-Let N be the size of source (typically N will be the batch size). Split each
-element of `input` based on `delimiter` and return a `SparseTensor`
-containing the splitted tokens. Empty tokens are ignored.
-
-`delimiter` can be empty, or a string of split characters. If `delimiter` is an
- empty string, each element of `input` is split into individual single-byte
- character strings, including splitting of UTF-8 multibyte sequences. Otherwise
- every character of `delimiter` is a potential split point.
-
-For example:
- N = 2, input[0] is 'hello world' and input[1] is 'a b c', then the output
- will be
-
- indices = [0, 0;
- 0, 1;
- 1, 0;
- 1, 1;
- 1, 2]
- shape = [2, 3]
- values = ['hello', 'world', 'a', 'b', 'c']
-
-input: 1-D. Strings to split.
-delimiter: 0-D. Delimiter characters (bytes), or empty string.
-skip_empty: A `bool`. If `True`, skip the empty strings from the result.
-indices: A dense matrix of int64 representing the indices of the sparse tensor.
-values: A vector of strings corresponding to the splited values.
-shape: a length-2 vector of int64 representing the shape of the sparse
- tensor, where the first value is N and the second value is the maximum number
- of tokens in a single input entry.
-)doc");
+ });
REGISTER_OP("EncodeBase64")
.Input("input: string")
.Output("output: string")
.Attr("pad: bool = false")
- .SetShapeFn(shape_inference::UnchangedShape)
- .Doc(R"doc(
-Encode strings into web-safe base64 format.
-
-Refer to the following article for more information on base64 format:
-en.wikipedia.org/wiki/Base64. Base64 strings may have padding with '=' at the
-end so that the encoded has length multiple of 4. See Padding section of the
-link above.
-
-Web-safe means that the encoder uses - and _ instead of + and /.
-
-input: Strings to be encoded.
-output: Input strings encoded in base64.
-pad: Bool whether padding is applied at the ends.
-)doc");
+ .SetShapeFn(shape_inference::UnchangedShape);
REGISTER_OP("DecodeBase64")
.Input("input: string")
.Output("output: string")
- .SetShapeFn(shape_inference::UnchangedShape)
- .Doc(R"doc(
-Decode web-safe base64-encoded strings.
-
-Input may or may not have padding at the end. See EncodeBase64 for padding.
-Web-safe means that input must use - and _ instead of + and /.
-
-input: Base64 strings to decode.
-output: Decoded strings.
-)doc");
+ .SetShapeFn(shape_inference::UnchangedShape);
REGISTER_OP("Substr")
.Input("input: string")
@@ -306,88 +145,6 @@ REGISTER_OP("Substr")
// c->input(0) is the ShapeHandle to input strings
// BroadcastBinaryOpShapeFn infers shape from c->input(0) and c->input(1).
return shape_inference::BroadcastBinaryOpShapeFn(c);
- })
- .Doc(R"doc(
-Return substrings from `Tensor` of strings.
-
-For each string in the input `Tensor`, creates a substring starting at index
-`pos` with a total length of `len`.
-
-If `len` defines a substring that would extend beyond the length of the input
-string, then as many characters as possible are used.
-
-If `pos` is negative or specifies a character index larger than any of the input
-strings, then an `InvalidArgumentError` is thrown.
-
-`pos` and `len` must have the same shape, otherwise a `ValueError` is thrown on
-Op creation.
-
-*NOTE*: `Substr` supports broadcasting up to two dimensions. More about
-broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-
----
-
-Examples
-
-Using scalar `pos` and `len`:
-
-```python
-input = [b'Hello', b'World']
-position = 1
-length = 3
-
-output = [b'ell', b'orl']
-```
-
-Using `pos` and `len` with same shape as `input`:
-
-```python
-input = [[b'ten', b'eleven', b'twelve'],
- [b'thirteen', b'fourteen', b'fifteen'],
- [b'sixteen', b'seventeen', b'eighteen']]
-position = [[1, 2, 3],
- [1, 2, 3],
- [1, 2, 3]]
-length = [[2, 3, 4],
- [4, 3, 2],
- [5, 5, 5]]
-
-output = [[b'en', b'eve', b'lve'],
- [b'hirt', b'urt', b'te'],
- [b'ixtee', b'vente', b'hteen']]
-```
-
-Broadcasting `pos` and `len` onto `input`:
-
-```
-input = [[b'ten', b'eleven', b'twelve'],
- [b'thirteen', b'fourteen', b'fifteen'],
- [b'sixteen', b'seventeen', b'eighteen'],
- [b'nineteen', b'twenty', b'twentyone']]
-position = [1, 2, 3]
-length = [1, 2, 3]
-
-output = [[b'e', b'ev', b'lve'],
- [b'h', b'ur', b'tee'],
- [b'i', b've', b'hte'],
- [b'i', b'en', b'nty']]
-```
-
-Broadcasting `input` onto `pos` and `len`:
-
-```
-input = b'thirteen'
-position = [1, 5, 7]
-length = [3, 2, 1]
-
-output = [b'hir', b'ee', b'n']
-```
-
-input: Tensor of strings
-pos: Scalar defining the position of first character in each substring
-len: Scalar defining the number of characters to include in each substring
-output: Tensor of substrings
-)doc");
+ });
} // namespace tensorflow