From bc6b0bee8fe4120642029daaa8ce6c069ef667b8 Mon Sep 17 00:00:00 2001 From: Benjamin Barenblat Date: Wed, 26 Aug 2015 17:29:32 -0400 Subject: Rework replacement API to rely on transformation Redesign library API around highly general regex-based transformations. Instead of specifying a string to substitute for each match, you now execute an entire function over the match (and over nonmatching regions as well). The resulting C++ code is much simpler, with more functionality pushed into Ur, and the engine now supports certain types of regex transformations needed to mimic Perl. --- src/regex.urs | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 8 deletions(-) (limited to 'src/regex.urs') diff --git a/src/regex.urs b/src/regex.urs index bd7696b..6e7bdcc 100644 --- a/src/regex.urs +++ b/src/regex.urs @@ -16,18 +16,65 @@ specific language governing permissions and limitations under the License. *) This library implements ECMAScript regular expressions. *) +type substring = {Start : int, Len : int} +type match = {Whole : substring, Groups : list substring} + (* Searching *) -(* Matches a regular expression against any part of a string. Returns 'Some -strs', where 'strs' is a list of subexpression matches, if a match succeeds, and -'None' otherwise. *) +(* Matches a regular expression against any part of a string. Returns +'Some match' if a match succeeds and 'None' otherwise. *) val match : string (* needle *) - -> string (* haystack *) - -> option (list string) + -> string (* haystack *) + -> option match + +(* Finds _all_ matches for a regular expression in a string. *) +val all_matches : string (* needle *) + -> string (* haystack *) + -> list match + +(* Replacement *) (* Replaces all substrings in 'haystack' that match 'needle' with the string 'replacement.' *) val replace : string (* needle *) - -> string (* replacement *) - -> string (* haystack *) - -> string + -> string (* replacement *) + -> string (* haystack *) + -> string + +(* Transforms a string by applying a function to replace every match in the +string. *) +val transform_matches : string (* needle *) + -> (match -> string) (* transformation *) + -> string (* haystack *) + -> string + +(* Executes a general regex-guided transformation over a string. Matches +'needle' against any part of 'haystack', splitting 'haystack' into matching and +nonmatching regions. Then, runs the provided transformation functions over the +regions and concatenates the results. + +The number of nonmatching regions is always exactly one more than the number of +matching regions. If two matching regions abut or a matching region adjoins the +edge of a string, this function will insert an empty nonmatching region as +appropriate. + +An example may make this a bit clearer: + + let + val haystack "axbxax" + in + transform "x" + (fn nm => "_" ^ String.substring haystack nm ^ "_") + (fn m => "*" ^ String.substring haystack m ^ "_") + haystack + end + +evaluates to + + "_a_*x*_b_*x*__" +*) +val transform : string (* needle *) + -> (substring -> string) (* non-matching transformation *) + -> (match -> string) (* matching transformation *) + -> string (* haystack *) + -> string -- cgit v1.2.3