From 655f2bc59e11ccd16229f2e7474b013e585192fb Mon Sep 17 00:00:00 2001 From: Benjamin Barenblat Date: Thu, 30 Jul 2015 17:26:16 -0400 Subject: Implement regex substitution --- src/lib.urp | 1 + src/regex.ur | 2 ++ src/regex.urs | 7 ++++++ src/regex__FFI.cc | 64 ++++++++++++++++++++++++++++++++++++++++++------------ src/regex__FFI.h | 4 ++++ src/regex__FFI.js | 24 +++++++++++++------- src/regex__FFI.urs | 7 ++++++ 7 files changed, 87 insertions(+), 22 deletions(-) diff --git a/src/lib.urp b/src/lib.urp index 2dff404..75bcd48 100644 --- a/src/lib.urp +++ b/src/lib.urp @@ -5,6 +5,7 @@ jsFunc Regex__FFI.succeeded=UrWeb.Regex.succeeded jsFunc Regex__FFI.n_subexpression_matches=UrWeb.Regex.nSubexpressionMatches jsFunc Regex__FFI.subexpression_match=UrWeb.Regex.subexpressionMatch jsFunc Regex__FFI.do_match=UrWeb.Regex.doMatch +jsFunc Regex__FFI.replace=UrWeb.Regex.replace file /cgGvSqBi.js regex__FFI.js script /cgGvSqBi.js diff --git a/src/regex.ur b/src/regex.ur index cc039e2..8d099ca 100644 --- a/src/regex.ur +++ b/src/regex.ur @@ -37,3 +37,5 @@ fun match regex input = Some (loop 0) end end + +val replace = FFI.replace diff --git a/src/regex.urs b/src/regex.urs index c3d2c7f..9d2ad25 100644 --- a/src/regex.urs +++ b/src/regex.urs @@ -24,3 +24,10 @@ strs', where 'strs' is a list of subexpression matches, if a match succeeds, and val match : string (* needle *) -> string (* haystack *) -> option (list string) + +(* Replaces all substrings in 'haystack' that match 'needle' with the string +'replacement.' *) +val replace : string (* needle *) + -> string (* haystack *) + -> string (* replacement *) + -> string diff --git a/src/regex__FFI.cc b/src/regex__FFI.cc index 66e25a8..57d4ce8 100644 --- a/src/regex__FFI.cc +++ b/src/regex__FFI.cc @@ -17,6 +17,7 @@ #include #include +#include #include // NOLINT(build/c++11) #include // NOLINT(build/include_order) @@ -59,6 +60,25 @@ Target Number(uw_context* const context, Source arg) { } } +// Compiles a regular expression. +std::regex Compile(uw_context* const context, const char needle_string[]) { + std::regex needle; + try { + needle.assign(needle_string, std::regex_constants::ECMAScript); + } catch (const std::regex_error& e) { + switch (e.code()) { + case std::regex_constants::error_space: + case std::regex_constants::error_stack: + // We ran out of memory. + uw_error(context, BOUNDED_RETRY, "regex: compilation failed: %s", + e.what()); + default: + uw_error(context, FATAL, "regex: compilation failed: %s", e.what()); + } + } + return needle; +} + } // namespace uw_Basis_bool uw_Regex__FFI_succeeded([[gnu::unused]] uw_context* const context, @@ -108,20 +128,7 @@ uw_Basis_string uw_Regex__FFI_subexpression_match( uw_Regex__FFI_match uw_Regex__FFI_do_match(uw_context* const context, const uw_Basis_string needle_string, const uw_Basis_string haystack) { - std::regex needle; - try { - needle.assign(needle_string, std::regex_constants::ECMAScript); - } catch (const std::regex_error& e) { - switch (e.code()) { - case std::regex_constants::error_space: - case std::regex_constants::error_stack: - // We ran out of memory. - uw_error(context, BOUNDED_RETRY, "regex: compilation failed: %s", - e.what()); - default: - uw_error(context, FATAL, "regex: compilation failed: %s", e.what()); - } - } + std::regex needle = Compile(context, needle_string); uw_Regex__FFI_match result; // Make a duplicate of the string to match against, so if it goes out of // scope in the calling Ur code, we still have it. @@ -141,3 +148,32 @@ uw_Regex__FFI_match uw_Regex__FFI_do_match(uw_context* const context, std::regex_search(result.haystack, *match_results, needle); return result; } + +uw_Basis_string uw_Regex__FFI_replace(uw_context* const context, + const uw_Basis_string needle_string, + const uw_Basis_string haystack, + const uw_Basis_string replacement) { + std::regex needle = Compile(context, needle_string); + // Perform the replacement. + std::string result; + try { + result = std::regex_replace(haystack, needle, replacement); + } catch (const std::regex_error& e) { + switch (e.code()) { + case std::regex_constants::error_space: + case std::regex_constants::error_stack: + // We ran out of memory. + uw_error(context, BOUNDED_RETRY, "regex: replacement failed: %s", + e.what()); + default: + uw_error(context, FATAL, "regex: replacement failed: %s", e.what()); + } + } + // Save the result string. + char* const result_string = + reinterpret_cast(uw_malloc(context, result.length() + 1)); + Assert(context, std::snprintf(result_string, result.length() + 1, "%s", + result.c_str()) >= 0, + "regex: snprintf failed during replace"); + return result_string; +} diff --git a/src/regex__FFI.h b/src/regex__FFI.h index 695033a..84b81a9 100644 --- a/src/regex__FFI.h +++ b/src/regex__FFI.h @@ -44,6 +44,10 @@ uw_Regex__FFI_match uw_Regex__FFI_do_match(struct uw_context*, const uw_Basis_string, const uw_Basis_string); +uw_Basis_string uw_Regex__FFI_replace(struct uw_context*, const uw_Basis_string, + const uw_Basis_string, + const uw_Basis_string); + #ifdef __cplusplus } #endif diff --git a/src/regex__FFI.js b/src/regex__FFI.js index fcb8927..c7aa880 100644 --- a/src/regex__FFI.js +++ b/src/regex__FFI.js @@ -14,6 +14,16 @@ var UrWeb = { Regex: { +_compile: function(needle_string) { + var needle; + try { + needle = new RegExp(needle_string); + } catch (e) { + er("regex: compilation failed"); + } + return needle; +}, + succeeded: function(match) { return !!match; }, @@ -29,14 +39,12 @@ subexpressionMatch: function(match, n) { return match[n + 1]; }, -doMatch: function(needle_string, haystack) { - var needle; - try { - needle = new RegExp(needle_string); - } catch (e) { - er("regex: compilation failed"); - } - return haystack.match(needle); +doMatch: function(needle, haystack) { + return haystack.match(UrWeb.Regex._compile(needle)); +}, + +replace: function(needle, haystack, replacement) { + return haystack.replace(UrWeb.Regex._compile(needle), replacement); }, }}; // UrWeb.Regex diff --git a/src/regex__FFI.urs b/src/regex__FFI.urs index 862d4f3..f01a27e 100644 --- a/src/regex__FFI.urs +++ b/src/regex__FFI.urs @@ -28,3 +28,10 @@ val subexpression_match : match -> int -> string val do_match : string (* needle *) -> string (* haystack *) -> match + +(* Replaces all substrings in 'haystack' that match 'needle' with the string +'replacement.' *) +val replace : string (* needle *) + -> string (* haystack *) + -> string (* replacement *) + -> string -- cgit v1.2.3