diff options
author | 2018-03-01 06:03:38 -0800 | |
---|---|---|
committer | 2018-03-01 06:07:59 -0800 | |
commit | 2b7a7ee30666d160929c9aa3e941fbc94c17cc52 (patch) | |
tree | b93b13c47c0c69bb6534a6894e2253ac749174c0 /tensorflow/core/kernels | |
parent | 46355f9065967dd39cd340b17d91a91f70d2c0c1 (diff) |
Add RegexReplace Op that internally calls RE2::Replace.
PiperOrigin-RevId: 187467840
Diffstat (limited to 'tensorflow/core/kernels')
-rw-r--r-- | tensorflow/core/kernels/BUILD | 8 | ||||
-rw-r--r-- | tensorflow/core/kernels/regex_replace_op.cc | 76 |
2 files changed, 84 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 3426cf6e40..feacee5d63 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -4155,6 +4155,7 @@ cc_library( ":as_string_op", ":base64_ops", ":reduce_join_op", + ":regex_replace_op", ":string_join_op", ":string_split_op", ":string_to_hash_bucket_op", @@ -4190,6 +4191,12 @@ tf_kernel_library( ) tf_kernel_library( + name = "regex_replace_op", + prefix = "regex_replace_op", + deps = STRING_DEPS + ["@com_googlesource_code_re2//:re2"], +) + +tf_kernel_library( name = "string_split_op", prefix = "string_split_op", deps = STRING_DEPS, @@ -5063,6 +5070,7 @@ filegroup( "scatter_nd_op*", "mutex_ops.*", "batch_kernels.*", + "regex_replace_op.cc", ], ), visibility = ["//visibility:public"], diff --git a/tensorflow/core/kernels/regex_replace_op.cc b/tensorflow/core/kernels/regex_replace_op.cc new file mode 100644 index 0000000000..59ec854a79 --- /dev/null +++ b/tensorflow/core/kernels/regex_replace_op.cc @@ -0,0 +1,76 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <string> + +#include "re2/re2.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +class RegexReplaceOp : public OpKernel { + public: + explicit RegexReplaceOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("replace_global", &replace_global_)); + } + + void Compute(OpKernelContext* ctx) override { + const Tensor* input_tensor; + OP_REQUIRES_OK(ctx, ctx->input("input", &input_tensor)); + const auto& input_flat = input_tensor->flat<string>(); + + const Tensor* pattern_tensor; + OP_REQUIRES_OK(ctx, ctx->input("pattern", &pattern_tensor)); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(pattern_tensor->shape()), + errors::InvalidArgument("Pattern must be scalar, but received ", + pattern_tensor->shape().DebugString())); + const string pattern = pattern_tensor->flat<string>()(0); + const RE2 match(pattern); + OP_REQUIRES(ctx, match.ok(), + errors::InvalidArgument("Invalid pattern: ", pattern, + ", error: ", match.error())); + + const Tensor* rewrite_tensor; + OP_REQUIRES_OK(ctx, ctx->input("rewrite", &rewrite_tensor)); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(rewrite_tensor->shape()), + errors::InvalidArgument("Rewrite must be scalar, but received ", + rewrite_tensor->shape().DebugString())); + const string rewrite = rewrite_tensor->flat<string>()(0); + + Tensor* output_tensor = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output("output", input_tensor->shape(), + &output_tensor)); + auto output_flat = output_tensor->flat<string>(); + for (size_t i = 0; i < input_flat.size(); ++i) { + output_flat(i) = input_flat(i); + if (replace_global_) { + RE2::GlobalReplace(&output_flat(i), match, rewrite); + } else { + RE2::Replace(&output_flat(i), match, rewrite); + } + } + } + + private: + bool replace_global_; +}; + +REGISTER_KERNEL_BUILDER(Name("RegexReplace").Device(DEVICE_CPU), + RegexReplaceOp); + +} // namespace tensorflow |