summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Adam Chlipala <adamc@csail.mit.edu>2019-01-26 13:40:07 -0500
committerGravatar GitHub <noreply@github.com>2019-01-26 13:40:07 -0500
commit0602c8c8b3f0b09a033f393e3d8af8310edff634 (patch)
treec06df163f10fde37e28d77b49204f1475a053c38 /src
parent24fc1cc302a087b2df3d2041b24011a8b65f499e (diff)
parent0844858d23b5a0d695ad719a650e755cc21a235c (diff)
Merge pull request #168 from fabriceleal/fix-codepoint-js
utf8: More portable js, implement urlifyChar, fix urlifyString, fix unit tests
Diffstat (limited to 'src')
-rw-r--r--src/c/urweb.c95
-rw-r--r--src/mono_opt.sml65
2 files changed, 134 insertions, 26 deletions
diff --git a/src/c/urweb.c b/src/c/urweb.c
index c8cfb0c6..58f7884d 100644
--- a/src/c/urweb.c
+++ b/src/c/urweb.c
@@ -1954,29 +1954,61 @@ char *uw_Basis_urlifyFloat(uw_context ctx, uw_Basis_float n) {
return r;
}
+static void aux_urlifyChar(char** ptr, uw_Basis_char c) {
+ char* p = *ptr;
+
+ if((uint32_t)(c) <= 0x7f) {
+ sprintf(p, ".%02X", (uint8_t)(c));
+ p += 3;
+ } else {
+ if((uint32_t)(c) <= 0x7ff) {
+ sprintf(p, ".%02X", (uint8_t)(((c)>>6)|0xc0));
+ p += 3;
+ } else {
+ if((uint32_t)(c) <= 0xffff) {
+ sprintf(p, ".%02X", (uint8_t)(((c)>>12)|0xe0));
+ p += 3;
+ } else {
+ sprintf(p, ".%02X", (uint8_t)(((c)>>18)|0xf0));
+ p += 3;
+ sprintf(p, ".%02X", (uint8_t)((((c)>>12)&0x3f)|0x80));
+ p += 3;
+ }
+ sprintf(p, ".%02X", (uint8_t)((((c)>>6)&0x3f)|0x80));
+ p += 3;
+ }
+ sprintf(p, ".%02X", (uint8_t)(((c)&0x3f)|0x80));
+ p += 3;
+ }
+
+ *ptr = p;
+}
+
char *uw_Basis_urlifyString(uw_context ctx, uw_Basis_string s) {
char *r, *p;
if (s[0] == '\0')
return "_";
- uw_check_heap(ctx, strlen(s) * 3 + 1 + !!(s[0] == '_'));
+ uw_check_heap(ctx, strlen(s) * 12 + 1 + !!(s[0] == '_'));
r = p = ctx->heap.front;
if (s[0] == '_')
*p++ = '_';
- for (; *s; s++) {
- unsigned char c = *s;
-
- if (c == ' ')
+ uw_Basis_char c;
+ int offset = 0, curr = 0;
+ while (s[offset] != 0) {
+ U8_NEXT(s, offset, -1, c);
+
+ if (U8_IS_SINGLE(s[curr]) && s[curr] == ' ')
*p++ = '+';
- else if (U8_IS_SINGLE(c) && isalnum(c))
- *p++ = c;
+ else if (U8_IS_SINGLE(s[curr]) && isalnum(s[curr]))
+ *p++ = s[curr];
else {
- sprintf(p, ".%02X", c);
- p += 3;
+ aux_urlifyChar(&p, c);
}
+ curr = offset;
}
*p++ = 0;
@@ -2046,6 +2078,29 @@ uw_unit uw_Basis_urlifyTime_w(uw_context ctx, uw_Basis_time t) {
return uw_Basis_urlifyInt_w(ctx, (uw_Basis_int)t.seconds * 1000000 + t.microseconds);
}
+uw_unit uw_Basis_urlifyChar_w(uw_context ctx, uw_Basis_char c) {
+ if (c == '\0') {
+ uw_check(ctx, 1);
+ uw_writec_unsafe(ctx, '_');
+ return uw_unit_v;
+ }
+
+ uw_check(ctx, 12 + !!(c == '_'));
+
+ if (c == '_')
+ uw_writec_unsafe(ctx, '_');
+
+ if (c == ' ')
+ uw_writec_unsafe(ctx, '+');
+ else if (isalnum(c) && c <= 0x7f)
+ uw_writec_unsafe(ctx, c);
+ else {
+ aux_urlifyChar(&(ctx->page.front), c);
+ }
+
+ return uw_unit_v;
+}
+
uw_unit uw_Basis_urlifyString_w(uw_context ctx, uw_Basis_string s) {
if (s[0] == '\0') {
uw_check(ctx, 1);
@@ -2053,22 +2108,24 @@ uw_unit uw_Basis_urlifyString_w(uw_context ctx, uw_Basis_string s) {
return uw_unit_v;
}
- uw_check(ctx, strlen(s) * 3 + !!(s[0] == '_'));
+ uw_check(ctx, strlen(s) * 12 + !!(s[0] == '_'));
if (s[0] == '_')
uw_writec_unsafe(ctx, '_');
- for (; *s; s++) {
- unsigned char c = *s;
-
- if (c == ' ')
+ uw_Basis_char c;
+ int offset = 0, curr = 0;
+ while (s[offset] != 0) {
+ U8_NEXT(s, offset, -1, c);
+
+ if (U8_IS_SINGLE(s[curr]) && s[curr] == ' ')
uw_writec_unsafe(ctx, '+');
- else if (U8_IS_SINGLE(c) && isalnum(c))
- uw_writec_unsafe(ctx, c);
- else {
- sprintf(ctx->page.front, ".%02X", c);
- ctx->page.front += 3;
+ else if (U8_IS_SINGLE(s[curr]) && isalnum(s[curr]))
+ uw_writec_unsafe(ctx, s[curr]);
+ else {
+ aux_urlifyChar(&(ctx->page.front), c);
}
+ curr = offset;
}
return uw_unit_v;
diff --git a/src/mono_opt.sml b/src/mono_opt.sml
index 40b865b0..cc85f05b 100644
--- a/src/mono_opt.sml
+++ b/src/mono_opt.sml
@@ -66,9 +66,9 @@ val htmlifyString = String.translate (fn #"<" => "&lt;"
fun htmlifySpecialChar ch = "&#" ^ Int.toString (ord ch) ^ ";"
-fun hexIt ch =
+fun hexPad c =
let
- val s = Int.fmt StringCvt.HEX (ord ch)
+ val s = Int.fmt StringCvt.HEX c
in
case size s of
0 => "00"
@@ -76,6 +76,54 @@ fun hexIt ch =
| _ => s
end
+fun rsh a b =
+ Int.fromLarge (IntInf.~>>(IntInf.fromInt a, Word.fromInt b))
+
+fun orb a b =
+ Int.fromLarge (IntInf.orb(IntInf.fromInt a, IntInf.fromInt b))
+
+fun andb a b =
+ Int.fromLarge (IntInf.andb(IntInf.fromInt a, IntInf.fromInt b))
+
+
+fun hexIt ch =
+ let
+ val c = ord ch
+ in
+ if (c <= 0x7f) then
+ hexPad c
+ else
+ ((if (c <= 0x7fff) then
+ hexPad (orb (rsh c 6) 0xc0)
+ else
+ (if (c <= 0xffff) then
+ hexPad (orb (rsh c 12) 0xe0)
+ else
+ hexPad (orb (rsh c 18) 0xf0)
+ ^ hexPad (orb (andb (rsh c 12) 0x3f) 0x80)
+ )
+ ^ hexPad (orb (andb (rsh c 6) 0x3f) 0x80))
+ ) ^ hexPad (orb (andb c 0x3f) 0x80)
+ end
+
+fun urlifyCharAux ch =
+ case ch of
+ #" " => "+"
+ | _ =>
+ if ord ch = 0 then
+ "_"
+ else
+ if Char.isAlphaNum ch then
+ str ch
+ else
+ "." ^ hexIt ch
+
+fun urlifyChar c =
+ case c of
+ #"_" => "_" ^ urlifyCharAux c
+ | _ => urlifyCharAux c
+
+
fun urlifyString s =
case s of
"" => "_"
@@ -84,11 +132,7 @@ fun urlifyString s =
"_"
else
"")
- ^ String.translate (fn #" " => "+"
- | ch => if Char.isAlphaNum ch then
- str ch
- else
- "." ^ hexIt ch) s
+ ^ String.translate urlifyCharAux s
fun sqlifyInt n = #p_cast (Settings.currentDbms ()) (attrifyInt n, Settings.Int)
@@ -349,6 +393,13 @@ fun exp e =
| EWrite (EFfiApp ("Basis", "urlifyString", [e]), _) =>
EFfiApp ("Basis", "urlifyString_w", [e])
+ | EFfiApp ("Basis", "urlifyChar", [((EPrim (Prim.Char c), _), _)]) =>
+ EPrim (Prim.String (Prim.Normal, urlifyChar c))
+ | EWrite (EFfiApp ("Basis", "urlifyChar", [((EPrim (Prim.Char c), _), _)]), loc) =>
+ EWrite (EPrim (Prim.String (Prim.Normal, urlifyChar c)), loc)
+ | EWrite (EFfiApp ("Basis", "urlifyChar", [e]), _) =>
+ EFfiApp ("Basis", "urlifyChar_w", [e])
+
| EFfiApp ("Basis", "urlifyBool", [((ECon (Enum, PConFfi {con = "True", ...}, NONE), _), _)]) =>
EPrim (Prim.String (Prim.Normal, "1"))
| EFfiApp ("Basis", "urlifyBool", [((ECon (Enum, PConFfi {con = "False", ...}, NONE), _), _)]) =>