summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Benjamin Barenblat <bbaren@mit.edu>2015-07-27 17:03:43 -0400
committerGravatar Benjamin Barenblat <bbaren@mit.edu>2015-07-27 17:03:43 -0400
commita8b108619083c2088269ea8071c7958f277ed41f (patch)
tree2bd68e215161d64e34cce66863ab8c0ebd1d44d2
parenta6c2ac566bd749c14a29f7d8d9d2d4898b1d49de (diff)
Rework to use the C++11 regex library
Switch to using the C++11 regex library for better portability and ease of use. As an added bonus, this should make it easier to implement regex substitution.
-rw-r--r--Makefile.am3
-rw-r--r--configure.ac4
-rw-r--r--src/regex__FFI.cc223
-rw-r--r--src/regex__FFI.h9
4 files changed, 106 insertions, 133 deletions
diff --git a/Makefile.am b/Makefile.am
index b3926a2..3cc46e9 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -29,7 +29,8 @@ CXXFLAGS = \
CXXFLAGS += \
-Weverything \
-Wno-c++98-compat \
- -Wno-padded
+ -Wno-padded \
+ -Wno-switch-enum
LDFLAGS = \
-Wl,-Bsymbolic-functions \
diff --git a/configure.ac b/configure.ac
index a9c1c74..9140509 100644
--- a/configure.ac
+++ b/configure.ac
@@ -24,10 +24,6 @@ AC_CONFIG_HEADERS([config.h])
AC_PROG_CXX([clang++])
AX_CXX_COMPILE_STDCXX_11([noext], [mandatory])
-AC_CHECK_HEADERS([regex.h],
- [],
- [AC_MSG_FAILURE([a POSIX-compatible regex.h is required])])
-
AC_CHECK_HEADERS([urweb/urweb_cpp.h],
[],
[AC_MSG_FAILURE([Ur/Web headers are required])])
diff --git a/src/regex__FFI.cc b/src/regex__FFI.cc
index 403171f..0ea3455 100644
--- a/src/regex__FFI.cc
+++ b/src/regex__FFI.cc
@@ -14,10 +14,11 @@
#include "regex__FFI.h"
-#include <sys/types.h>
#include <regex.h>
#include <cstring>
+#include <limits>
+#include <regex>
extern "C" {
#include <urweb/urweb_cpp.h>
@@ -27,9 +28,6 @@ extern "C" {
namespace {
-using Regex = uw_Regex__FFI_regex;
-using Match = uw_Regex__FFI_match;
-
// Asserts a condition without crashing or releasing information about where the
// error occurred. This function is essential for web programming, where an
// attacker should not be able to bring down the app by causing an assertion
@@ -46,147 +44,130 @@ void Assert(uw_context* const context,
Assert(context, condition, FATAL, message);
}
-void FinalizeRegex(void* regex, [[gnu::unused]] const int _will_retry) {
- regfree(reinterpret_cast<regex_t*>(regex));
+void DeleteRegex(void* regex, [[gnu::unused]] const int _will_retry) {
+ delete reinterpret_cast<std::regex*>(regex);
}
-void DeleteRegex(void* regex, [[gnu::unused]] const int _will_retry) {
- delete reinterpret_cast<regex_t*>(regex);
+void DeleteMatchResults(void* match_result,
+ [[gnu::unused]] const int _will_retry) {
+ delete reinterpret_cast<std::cmatch*>(match_result);
+}
+
+// Bounds-checked numeric type conversion
+template<typename T, typename U>
+U Number(uw_context* const context, const T input) {
+ Assert(context, input <= std::numeric_limits<U>::max(),
+ "regex: detected overflow during numeric conversion");
+ if (std::numeric_limits<T>::is_signed == std::numeric_limits<U>::is_signed) {
+ Assert(context, std::numeric_limits<U>::lowest() <= input,
+ "regex: detected underflow during numeric conversion");
+ } else if (std::numeric_limits<T>::is_signed) {
+ Assert(context, 0 <= input,
+ "regex: detected underflow during numeric conversion");
+ }
+ return static_cast<U>(input);
}
} // namespace
-uw_Basis_bool uw_Regex__FFI_succeeded(
- [[gnu::unused]] struct uw_context* _context,
- const Match match) {
- return match.succeeded ? uw_Basis_True : uw_Basis_False;
+uw_Basis_bool uw_Regex__FFI_succeeded([[gnu::unused]] uw_context* const context,
+ const uw_Regex__FFI_match match) {
+ if (reinterpret_cast<std::cmatch*>(match.result)->empty()) {
+ return uw_Basis_False;
+ } else {
+ return uw_Basis_True;
+ }
}
uw_Basis_int uw_Regex__FFI_n_subexpression_matches(
- [[gnu::unused]] struct uw_context* _context,
- const Match match) {
- return match.n_matches;
+ uw_context* const context,
+ const uw_Regex__FFI_match match) {
+ const std::cmatch::size_type n_matches =
+ reinterpret_cast<std::cmatch*>(match.result)->size();
+ if (n_matches == 0) {
+ // Nothing got matched.
+ return 0;
+ } else {
+ // At least one match occurred. Compute the number of parenthesized
+ // subexpressions that got matched, and return it.
+ return Number<std::cmatch::size_type, uw_Basis_int>(context, n_matches) - 1;
+ }
}
uw_Basis_string uw_Regex__FFI_subexpression_match(
- struct uw_context* context,
- const Match match,
- const uw_Basis_int match_index) {
- Assert(context, match.matches[match_index].rm_so != -1,
+ uw_context* const context,
+ const uw_Regex__FFI_match match,
+ const uw_Basis_int match_index_signed) {
+ const std::cmatch* const match_result =
+ reinterpret_cast<std::cmatch*>(match.result);
+ const std::size_t match_index =
+ Number<uw_Basis_int, std::size_t>(context, match_index_signed);
+ Assert(context, match_index < match_result->size(),
"regex: match does not exist");
- // Locate the substring in the string to match aginst.
- const char* const substring_start =
- match.haystack + match.matches[match_index].rm_so;
- // Copy it into its own buffer so we can properly null-terminate it.
- const std::size_t substring_length =
- static_cast<std::size_t>(match.matches[match_index].rm_eo
- - match.matches[match_index].rm_so);
- uw_Basis_string result = reinterpret_cast<uw_Basis_string>(
- uw_malloc(context, substring_length + 1));
- std::memcpy(result, substring_start, substring_length);
- result[substring_length] = '\0';
+ const auto matched_substring = (*match_result)[match_index + 1];
+ // Save the matched substring.
+ const std::size_t result_length =
+ Number<std::csub_match::difference_type, std::size_t>(
+ context,
+ matched_substring.length());
+ uw_Basis_string result =
+ reinterpret_cast<uw_Basis_string>(
+ uw_malloc(context, result_length + 1));
+ std::strcpy(result, matched_substring.str().c_str());
return result;
}
-Regex uw_Regex__FFI_compile(uw_context* const context,
- const uw_Basis_bool case_sensitive,
- const uw_Basis_string input) {
- Regex result;
- result.text = input;
- // We'd like to stack-allocate the compiled field of the Regex struct--or, at
- // least, to allocate it with uw_malloc. Unfortunately, neither of those will
- // work, because we need to be able to run a finalizer on it, and Ur
- // finalizers can only reference addresses that are not managed by Ur.
- result.compiled = new regex_t;
+uw_Regex__FFI_regex uw_Regex__FFI_compile(uw_context* const context,
+ const uw_Basis_bool case_sensitive,
+ const uw_Basis_string input) {
+ // We'd like to stack-allocate the result--or, at least, to allocate it with
+ // uw_malloc. Unfortunately, neither of those will work, because we need to
+ // run a finalizer on it, and Ur finalizers can only reference addresses that
+ // are not managed by Ur.
+ auto* result = new std::regex;
Assert(context,
- uw_register_transactional(context, result.compiled,
+ uw_register_transactional(context, result,
nullptr, nullptr, DeleteRegex) == 0,
"regex: could not register DeleteRegex finalizer");
- // Compile the regex.
- const auto flags = REG_EXTENDED | (case_sensitive ? 0 : REG_ICASE);
- switch (const auto regcomp_error = regcomp(result.compiled, input, flags)) {
- case 0:
- // Everything worked perfectly.
- break;
- case REG_ESPACE:
- // We ran out of memory.
- uw_error(context, BOUNDED_RETRY, "regex: could not allocate");
- default:
- // Something else happened. Generate a nice message for the user.
- const auto message_size =
- regerror(regcomp_error, result.compiled, nullptr, 0);
- char* const message =
- reinterpret_cast<char*>(uw_malloc(context, message_size));
- Assert(context,
- regerror(regcomp_error, result.compiled, message,
- message_size) == message_size,
- "regex: compilation failed, but error message could not be"
- " generated");
- uw_error(context, FATAL, "regex: compilation failed: %s", message);
+ auto flags = std::regex_constants::extended;
+ if (!case_sensitive) {
+ flags |= std::regex_constants::icase;
+ }
+ try {
+ result->assign(input, flags);
+ } catch (const std::regex_error& e) {
+ switch (e.code()) {
+ case std::regex_constants::error_space:
+ case std::regex_constants::error_stack:
+ // We ran out of memory.
+ uw_error(context, BOUNDED_RETRY,
+ "regex: compilation failed: %s", e.what());
+ default:
+ uw_error(context, FATAL,
+ "regex: compilation failed: %s", e.what());
+ }
}
- Assert(context,
- uw_register_transactional(context, result.compiled,
- nullptr, nullptr, FinalizeRegex) == 0,
- "regex: could not register FinalizeRegex finalizer");
- // Give the caller the regex.
return result;
}
-Match uw_Regex__FFI_do_match(uw_context* const context, const Regex needle,
- const uw_Basis_string haystack) {
- Match result;
- // Make a duplicate of the string to match against, so if it goes out of scope
- // in the calling Ur code, we still have it. TODO(bbaren): Is this necessary?
+uw_Regex__FFI_match uw_Regex__FFI_do_match(uw_context* const context,
+ const uw_Regex__FFI_regex needle,
+ const uw_Basis_string haystack) {
+ uw_Regex__FFI_match result;
+ // Make a duplicate of the string to match against, so if it goes out of
+ // scope in the calling Ur code, we still have it.
result.haystack =
- reinterpret_cast<uw_Basis_string>(
- uw_malloc(context, std::strlen(haystack)));
+ reinterpret_cast<char*>(uw_malloc(context, std::strlen(haystack)));
std::strcpy(result.haystack, haystack);
- // Figure out how many groups we could have so we can allocate enough space to
- // store the match information.
- result.n_matches = 0;
- for (std::size_t i = 0; i < std::strlen(needle.text); i++) {
- switch (needle.text[i]) {
- case '\\':
- // The next character is escaped, so it can't possibly be the
- // metacharacter '('. Skip it.
- i++;
- break;
- case '(':
- // That's our metacharacter.
- result.n_matches++;
- break;
- default:
- // Nothing interesting.
- break;
- }
- }
- // Allocate to store the match information. Allocate one more slot than we
- // need, because the regex engine puts information about the entire match in
- // the first slot.
- result.matches =
- reinterpret_cast<regmatch_t*>(
- uw_malloc(context, (result.n_matches + 1) * sizeof(regmatch_t)));
- // Execute the regex.
- switch (regexec(needle.compiled, haystack, result.n_matches + 1,
- result.matches, 0)) {
- case 0:
- // A match occurred.
- result.succeeded = 1;
- // Bump the matches array to skip information about the entire match.
- result.matches++;
- break;
- case REG_NOMATCH:
- // No match occurred.
- result.succeeded = 0;
- result.n_matches = 0;
- result.matches = nullptr;
- break;
- case REG_ESPACE:
- // We ran out of memory.
- uw_error(context, BOUNDED_RETRY, "regex: could not allocate");
- default:
- // Some unknown error occurred.
- uw_error(context, FATAL, "regex: could not execute regular expression");
- }
+ // Allocate to store the match information.
+ auto* match_results = new std::cmatch;
+ Assert(context,
+ uw_register_transactional(context, match_results,
+ nullptr, nullptr, DeleteMatchResults) == 0,
+ "regex: could not register DeleteMatchResults finalizer");
+ result.result = match_results;
+ // Execute the regex on the saved haystack, not the original one.
+ std::regex_search(result.haystack, *match_results,
+ *reinterpret_cast<std::regex*>(needle));
return result;
}
diff --git a/src/regex__FFI.h b/src/regex__FFI.h
index 6fa9482..9d6a2c2 100644
--- a/src/regex__FFI.h
+++ b/src/regex__FFI.h
@@ -22,16 +22,11 @@ extern "C" {
#include <urweb/urweb_cpp.h>
-typedef struct {
- char* text;
- void* compiled;
-} uw_Regex__FFI_regex;
+typedef void* uw_Regex__FFI_regex;
typedef struct {
char* haystack;
- int succeeded;
- unsigned n_matches;
- regmatch_t* matches;
+ void* result;
} uw_Regex__FFI_match;
uw_Basis_bool uw_Regex__FFI_succeeded(struct uw_context*,