summaryrefslogtreecommitdiff
path: root/server/zstring.c
diff options
context:
space:
mode:
authorGravatar Karl Ramm <kcr@mit.edu>2009-03-01 21:40:08 +0000
committerGravatar Karl Ramm <kcr@mit.edu>2009-03-01 21:40:08 +0000
commit5a4eb277e3f77610b67c13ee05ab4715f018b93b (patch)
treeb2bf68cd963b0af1e497391f812ce4a6299cd530 /server/zstring.c
parent6026937a4c7a2101b99ee7e7ada58fff83a180ce (diff)
asedeno's UTF-8 downcasing patch
Diffstat (limited to 'server/zstring.c')
-rw-r--r--server/zstring.c55
1 files changed, 41 insertions, 14 deletions
diff --git a/server/zstring.c b/server/zstring.c
index 05cd418..3807d18 100644
--- a/server/zstring.c
+++ b/server/zstring.c
@@ -22,6 +22,45 @@ static const char rcsid_zstring_c[] =
static String *zhash[STRING_HASH_TABLE_SIZE];
+int valid_utf8_p(const char* s)
+{
+ int len;
+ ssize_t uc;
+
+ while (len = utf8proc_iterate(s, -1, &uc)) {
+ if (len <=0) return 0; /* Not valid UTF-8 encoding. */
+ if (!(utf8proc_codepoint_valid(uc))) return 0; /* Not valid unicode codepoint. */
+ if (uc == 0) return 1; /* NULL, we're done. */
+ s += len;
+ }
+ return 0; /* We shouldn't get here. */
+}
+
+static char *zdowncase(const char* s)
+{
+ char *new_s, *p;
+
+ if (valid_utf8_p(s)) {
+ /* Use utf8proc if we're dealing with UTF-8.
+ * Rather than downcase, casefold and normalize to NFKC.
+ */
+ utf8proc_map(s, 0, &new_s,
+ UTF8PROC_NULLTERM | UTF8PROC_STABLE
+ | UTF8PROC_CASEFOLD | UTF8PROC_COMPAT
+ | UTF8PROC_COMPOSE);
+ } else {
+ /* If not, fall back to old methods. */
+ new_s = strsave(s);
+ p = new_s;
+ while(*p) {
+ if (isascii(*p) && isupper(*p))
+ *p = tolower(*p);
+ p++;
+ }
+ }
+ return new_s;
+}
+
String *
make_string(char *s,
int downcase)
@@ -31,13 +70,7 @@ make_string(char *s,
int i;
if (downcase) {
- new_s = strsave(s);
- p = new_s;
- while(*p) {
- if (isascii(*p) && isupper(*p))
- *p = tolower(*p);
- p++;
- }
+ new_s = zdowncase(s);
} else {
new_s = s;
}
@@ -102,13 +135,7 @@ find_string(char *s,
String *z;
if (downcase) {
- new_s = strsave(s);
- p = new_s;
- while (*p) {
- if (isascii(*p) && isupper(*p))
- *p = tolower(*p);
- p++;
- }
+ new_s = zdowncase(s);
} else {
new_s = s;
}