From 0bfe054e0e93cf0c0a19f63eb2cfb6b4afd88ef7 Mon Sep 17 00:00:00 2001 From: Benjamin Barenblat Date: Fri, 3 Jul 2015 15:52:18 -0400 Subject: Initial commit of the regex matcher MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wrap glibc’s regex engine to allow matching and group capture in POSIX extended regular expressions. It might be worth rewriting this in terms of the C++11 regex engine; it’s more featureful and more pleasant to use, although it would require more casting. (C can’t represent the std::regex type, so I’d need to use some void pointers.) --- src/regex.ur | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 src/regex.ur (limited to 'src/regex.ur') diff --git a/src/regex.ur b/src/regex.ur new file mode 100644 index 0000000..ddc7793 --- /dev/null +++ b/src/regex.ur @@ -0,0 +1,44 @@ +(* Copyright 2015 the Massachusetts Institute of Technology + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. You may obtain a copy of the +License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. *) + +structure FFI = Regex__FFI + +type t = FFI.regex + +val compile = FFI.compile True + +val compile_case_insensitive = FFI.compile False + +fun match regex input = + (* Perform the match. *) + let + val result = FFI.do_match regex input + in + if not (FFI.succeeded result) + then + (* No match occurred. *) + None + else + (* Get the subexpressions. We must do this iteratively, as the Regex__FFI + API can't return a list of matches. *) + let + fun loop i = + if i = FFI.n_subexpression_matches result + then + (* We've got all the subexpressions. *) + [] + else FFI.subexpression_match result i :: loop (i + 1) + in + Some (loop 0) + end + end -- cgit v1.2.3