summaryrefslogtreecommitdiff
path: root/src/c
diff options
context:
space:
mode:
authorGravatar Adam Chlipala <adamc@hcoop.net>2009-12-03 11:50:51 -0500
committerGravatar Adam Chlipala <adamc@hcoop.net>2009-12-03 11:50:51 -0500
commitebff207e853e7dda8f5dc0bf364c6578d86c5c55 (patch)
treea8e6fae2fe503e60c209eeab70b9ff462ab808c8 /src/c
parentb9e79524309c6584f6690262bba65c30b6ad146e (diff)
UTF-8 in dynamic escaping
Diffstat (limited to 'src/c')
-rw-r--r--src/c/urweb.c78
1 files changed, 68 insertions, 10 deletions
diff --git a/src/c/urweb.c b/src/c/urweb.c
index 344ef2ad..6e2b9e22 100644
--- a/src/c/urweb.c
+++ b/src/c/urweb.c
@@ -1410,6 +1410,10 @@ char *uw_Basis_attrifyFloat(uw_context ctx, uw_Basis_float n) {
return result;
}
+static int isCont(unsigned char ch) {
+ return ch / 64 == 2;
+}
+
char *uw_Basis_attrifyString(uw_context ctx, uw_Basis_string s) {
int len = strlen(s);
char *result, *p;
@@ -1418,7 +1422,7 @@ char *uw_Basis_attrifyString(uw_context ctx, uw_Basis_string s) {
result = p = ctx->heap.front;
for (; *s; s++) {
- char c = *s;
+ unsigned char c = *s;
if (c == '"') {
strcpy(p, "&quot;");
@@ -1429,7 +1433,19 @@ char *uw_Basis_attrifyString(uw_context ctx, uw_Basis_string s) {
}
else if (isprint(c))
*p++ = c;
- else {
+ else if (c / 32 == 6 && isCont(s[1])) {
+ memcpy(p, s, 2);
+ p += 2;
+ ++s;
+ } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) {
+ memcpy(p, s, 3);
+ p += 3;
+ s += 2;
+ } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) {
+ memcpy(p, s, 4);
+ p += 4;
+ s += 3;
+ } else {
int len2;
sprintf(p, "&#%d;%n", c, &len2);
p += len2;
@@ -1499,7 +1515,7 @@ uw_unit uw_Basis_attrifyString_w(uw_context ctx, uw_Basis_string s) {
uw_check(ctx, strlen(s) * 6);
for (; *s; s++) {
- char c = *s;
+ unsigned char c = *s;
if (c == '"')
uw_write_unsafe(ctx, "&quot;");
@@ -1507,7 +1523,22 @@ uw_unit uw_Basis_attrifyString_w(uw_context ctx, uw_Basis_string s) {
uw_write_unsafe(ctx, "&amp;");
else if (isprint(c))
uw_writec_unsafe(ctx, c);
- else {
+ else if (c / 32 == 6 && isCont(s[1])) {
+ uw_writec_unsafe(ctx, c);
+ uw_writec_unsafe(ctx, s[1]);
+ ++s;
+ } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) {
+ uw_writec_unsafe(ctx, c);
+ uw_writec_unsafe(ctx, s[1]);
+ uw_writec_unsafe(ctx, s[2]);
+ s += 2;
+ } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) {
+ uw_writec_unsafe(ctx, c);
+ uw_writec_unsafe(ctx, s[1]);
+ uw_writec_unsafe(ctx, s[2]);
+ uw_writec_unsafe(ctx, s[3]);
+ s += 3;
+ } else {
uw_write_unsafe(ctx, "&#");
uw_Basis_attrifyInt_w_unsafe(ctx, c);
uw_writec_unsafe(ctx, ';');
@@ -1847,7 +1878,7 @@ char *uw_Basis_htmlifyString(uw_context ctx, uw_Basis_string s) {
uw_check_heap(ctx, strlen(s) * 5 + 1);
for (r = s2 = ctx->heap.front; *s; s++) {
- char c = *s;
+ unsigned char c = *s;
switch (c) {
case '<':
@@ -1859,9 +1890,21 @@ char *uw_Basis_htmlifyString(uw_context ctx, uw_Basis_string s) {
s2 += 5;
break;
default:
- if (isprint(c))
+ if (isprint(c) || isspace(c))
*s2++ = c;
- else {
+ else if (c / 32 == 6 && isCont(s[1])) {
+ memcpy(s2, s, 2);
+ s2 += 2;
+ ++s;
+ } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) {
+ memcpy(s2, s, 3);
+ s2 += 3;
+ s += 2;
+ } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) {
+ memcpy(s2, s, 4);
+ s2 += 4;
+ s += 3;
+ } else {
int len2;
sprintf(s2, "&#%d;%n", c, &len2);
s2 += len2;
@@ -1878,7 +1921,7 @@ uw_unit uw_Basis_htmlifyString_w(uw_context ctx, uw_Basis_string s) {
uw_check(ctx, strlen(s) * 6);
for (; *s; s++) {
- char c = *s;
+ unsigned char c = *s;
switch (c) {
case '<':
@@ -1888,9 +1931,24 @@ uw_unit uw_Basis_htmlifyString_w(uw_context ctx, uw_Basis_string s) {
uw_write_unsafe(ctx, "&amp;");
break;
default:
- if (isprint(c))
+ if (isprint(c) || isspace(c))
uw_writec_unsafe(ctx, c);
- else {
+ else if (c / 32 == 6 && isCont(s[1])) {
+ uw_writec_unsafe(ctx, c);
+ uw_writec_unsafe(ctx, s[1]);
+ ++s;
+ } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) {
+ uw_writec_unsafe(ctx, c);
+ uw_writec_unsafe(ctx, s[1]);
+ uw_writec_unsafe(ctx, s[2]);
+ s += 2;
+ } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) {
+ uw_writec_unsafe(ctx, c);
+ uw_writec_unsafe(ctx, s[1]);
+ uw_writec_unsafe(ctx, s[2]);
+ uw_writec_unsafe(ctx, s[3]);
+ s += 3;
+ } else {
uw_write_unsafe(ctx, "&#");
uw_Basis_attrifyInt_w_unsafe(ctx, c);
uw_writec_unsafe(ctx, ';');