summaryrefslogtreecommitdiff
path: root/src/regex__FFI.urs
diff options
context:
space:
mode:
authorGravatar Benjamin Barenblat <bbaren@mit.edu>2015-08-26 17:29:32 -0400
committerGravatar Benjamin Barenblat <bbaren@mit.edu>2015-08-26 17:29:32 -0400
commitbc6b0bee8fe4120642029daaa8ce6c069ef667b8 (patch)
treefab27db4556318e4f6a4b7c60e6b47c7ab58e59a /src/regex__FFI.urs
parent75ff1a7a1979466a77dcc3acbbb88e897213027f (diff)
Rework replacement API to rely on transformation
Redesign library API around highly general regex-based transformations. Instead of specifying a string to substitute for each match, you now execute an entire function over the match (and over nonmatching regions as well). The resulting C++ code is much simpler, with more functionality pushed into Ur, and the engine now supports certain types of regex transformations needed to mimic Perl.
Diffstat (limited to 'src/regex__FFI.urs')
-rw-r--r--src/regex__FFI.urs36
1 files changed, 21 insertions, 15 deletions
diff --git a/src/regex__FFI.urs b/src/regex__FFI.urs
index 0f10052..02e3880 100644
--- a/src/regex__FFI.urs
+++ b/src/regex__FFI.urs
@@ -15,23 +15,29 @@ specific language governing permissions and limitations under the License. *)
(* This is an internal module. You should use the high-level API in Regex
instead. *)
+(* Ideally, these types would be declared in a nice module hierarchy.
+Unfortunately, Ur/Web bug #207 makes that impossible. *)
+type substring_t
+val substring_start : substring_t -> int
+val substring_length : substring_t -> int
-(* Data about a match. There is no function which returns all subexpression
-matches, as we can't build an Ur list in C. *)
-type match
-val succeeded : match -> bool
-val n_subexpression_matches : match -> int
-val subexpression_match : match -> int -> string
+type substring_list_t
+val substring_list_length : substring_list_t -> int
+val substring_list_get : substring_list_t -> int -> substring_t
+(* Matches a regular expression against any part of a string. Returns a list of
+groups. The zeroth element of each match represents the match as a whole.
+Thus, matching /a(b*c)d/ against
-(* Matches a regular expression against any part of a string. *)
+ 1 1
+ 0 5 0 5
+ __acd__abbbbcd__
+
+will yield
+
+ [(2,3), (3, 1)]
+
+where (x,y) is a substring with start x and length y. *)
val do_match : string (* needle *)
-> string (* haystack *)
- -> match
-
-(* Replaces all substrings in 'haystack' that match 'needle' with the string
-'replacement.' *)
-val replace : string (* needle *)
- -> string (* replacement *)
- -> string (* haystack *)
- -> string
+ -> substring_list_t