diff options
author | Adam Chlipala <adamc@hcoop.net> | 2009-12-03 11:50:51 -0500 |
---|---|---|
committer | Adam Chlipala <adamc@hcoop.net> | 2009-12-03 11:50:51 -0500 |
commit | 2304eece8f5f2b9067cd66d860a332f8721c5321 (patch) | |
tree | a8e6fae2fe503e60c209eeab70b9ff462ab808c8 /src/c/urweb.c | |
parent | e7b50987708729b9c3d0c8c0acb58e801142e6b9 (diff) |
UTF-8 in dynamic escaping
Diffstat (limited to 'src/c/urweb.c')
-rw-r--r-- | src/c/urweb.c | 78 |
1 files changed, 68 insertions, 10 deletions
diff --git a/src/c/urweb.c b/src/c/urweb.c index 344ef2ad..6e2b9e22 100644 --- a/src/c/urweb.c +++ b/src/c/urweb.c @@ -1410,6 +1410,10 @@ char *uw_Basis_attrifyFloat(uw_context ctx, uw_Basis_float n) { return result; } +static int isCont(unsigned char ch) { + return ch / 64 == 2; +} + char *uw_Basis_attrifyString(uw_context ctx, uw_Basis_string s) { int len = strlen(s); char *result, *p; @@ -1418,7 +1422,7 @@ char *uw_Basis_attrifyString(uw_context ctx, uw_Basis_string s) { result = p = ctx->heap.front; for (; *s; s++) { - char c = *s; + unsigned char c = *s; if (c == '"') { strcpy(p, """); @@ -1429,7 +1433,19 @@ char *uw_Basis_attrifyString(uw_context ctx, uw_Basis_string s) { } else if (isprint(c)) *p++ = c; - else { + else if (c / 32 == 6 && isCont(s[1])) { + memcpy(p, s, 2); + p += 2; + ++s; + } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) { + memcpy(p, s, 3); + p += 3; + s += 2; + } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) { + memcpy(p, s, 4); + p += 4; + s += 3; + } else { int len2; sprintf(p, "&#%d;%n", c, &len2); p += len2; @@ -1499,7 +1515,7 @@ uw_unit uw_Basis_attrifyString_w(uw_context ctx, uw_Basis_string s) { uw_check(ctx, strlen(s) * 6); for (; *s; s++) { - char c = *s; + unsigned char c = *s; if (c == '"') uw_write_unsafe(ctx, """); @@ -1507,7 +1523,22 @@ uw_unit uw_Basis_attrifyString_w(uw_context ctx, uw_Basis_string s) { uw_write_unsafe(ctx, "&"); else if (isprint(c)) uw_writec_unsafe(ctx, c); - else { + else if (c / 32 == 6 && isCont(s[1])) { + uw_writec_unsafe(ctx, c); + uw_writec_unsafe(ctx, s[1]); + ++s; + } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) { + uw_writec_unsafe(ctx, c); + uw_writec_unsafe(ctx, s[1]); + uw_writec_unsafe(ctx, s[2]); + s += 2; + } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) { + uw_writec_unsafe(ctx, c); + uw_writec_unsafe(ctx, s[1]); + uw_writec_unsafe(ctx, s[2]); + uw_writec_unsafe(ctx, s[3]); + s += 3; + } else { uw_write_unsafe(ctx, "&#"); uw_Basis_attrifyInt_w_unsafe(ctx, c); uw_writec_unsafe(ctx, ';'); @@ -1847,7 +1878,7 @@ char *uw_Basis_htmlifyString(uw_context ctx, uw_Basis_string s) { uw_check_heap(ctx, strlen(s) * 5 + 1); for (r = s2 = ctx->heap.front; *s; s++) { - char c = *s; + unsigned char c = *s; switch (c) { case '<': @@ -1859,9 +1890,21 @@ char *uw_Basis_htmlifyString(uw_context ctx, uw_Basis_string s) { s2 += 5; break; default: - if (isprint(c)) + if (isprint(c) || isspace(c)) *s2++ = c; - else { + else if (c / 32 == 6 && isCont(s[1])) { + memcpy(s2, s, 2); + s2 += 2; + ++s; + } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) { + memcpy(s2, s, 3); + s2 += 3; + s += 2; + } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) { + memcpy(s2, s, 4); + s2 += 4; + s += 3; + } else { int len2; sprintf(s2, "&#%d;%n", c, &len2); s2 += len2; @@ -1878,7 +1921,7 @@ uw_unit uw_Basis_htmlifyString_w(uw_context ctx, uw_Basis_string s) { uw_check(ctx, strlen(s) * 6); for (; *s; s++) { - char c = *s; + unsigned char c = *s; switch (c) { case '<': @@ -1888,9 +1931,24 @@ uw_unit uw_Basis_htmlifyString_w(uw_context ctx, uw_Basis_string s) { uw_write_unsafe(ctx, "&"); break; default: - if (isprint(c)) + if (isprint(c) || isspace(c)) uw_writec_unsafe(ctx, c); - else { + else if (c / 32 == 6 && isCont(s[1])) { + uw_writec_unsafe(ctx, c); + uw_writec_unsafe(ctx, s[1]); + ++s; + } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) { + uw_writec_unsafe(ctx, c); + uw_writec_unsafe(ctx, s[1]); + uw_writec_unsafe(ctx, s[2]); + s += 2; + } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) { + uw_writec_unsafe(ctx, c); + uw_writec_unsafe(ctx, s[1]); + uw_writec_unsafe(ctx, s[2]); + uw_writec_unsafe(ctx, s[3]); + s += 3; + } else { uw_write_unsafe(ctx, "&#"); uw_Basis_attrifyInt_w_unsafe(ctx, c); uw_writec_unsafe(ctx, ';'); |