From b4d31722f1ee192d91717e40a3c1bed281ed9392 Mon Sep 17 00:00:00 2001 From: fab Date: Wed, 9 Jan 2019 22:21:14 +0000 Subject: fix unit tests. implement urlifyChar --- src/c/urweb.c | 95 ++++++++++++++++++++++++++++++++++++++++++++------------ src/mono_opt.sml | 67 ++++++++++++++++++++++++++++++++++----- 2 files changed, 135 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/c/urweb.c b/src/c/urweb.c index ae2fc0a8..e98b5772 100644 --- a/src/c/urweb.c +++ b/src/c/urweb.c @@ -1954,29 +1954,61 @@ char *uw_Basis_urlifyFloat(uw_context ctx, uw_Basis_float n) { return r; } +static void aux_urlifyChar(char** ptr, uw_Basis_char c) { + char* p = *ptr; + + if((uint32_t)(c) <= 0x7f) { + sprintf(p, ".%02X", (uint8_t)(c)); + p += 3; + } else { + if((uint32_t)(c) <= 0x7ff) { + sprintf(p, ".%02X", (uint8_t)(((c)>>6)|0xc0)); + p += 3; + } else { + if((uint32_t)(c) <= 0xffff) { + sprintf(p, ".%02X", (uint8_t)(((c)>>12)|0xe0)); + p += 3; + } else { + sprintf(p, ".%02X", (uint8_t)(((c)>>18)|0xf0)); + p += 3; + sprintf(p, ".%02X", (uint8_t)((((c)>>12)&0x3f)|0x80)); + p += 3; + } + sprintf(p, ".%02X", (uint8_t)((((c)>>6)&0x3f)|0x80)); + p += 3; + } + sprintf(p, ".%02X", (uint8_t)(((c)&0x3f)|0x80)); + p += 3; + } + + *ptr = p; +} + char *uw_Basis_urlifyString(uw_context ctx, uw_Basis_string s) { char *r, *p; if (s[0] == '\0') return "_"; - uw_check_heap(ctx, strlen(s) * 3 + 1 + !!(s[0] == '_')); + uw_check_heap(ctx, strlen(s) * 12 + 1 + !!(s[0] == '_')); r = p = ctx->heap.front; if (s[0] == '_') *p++ = '_'; - for (; *s; s++) { - unsigned char c = *s; - - if (c == ' ') + uw_Basis_char c; + int offset = 0, curr = 0; + while (s[offset] != 0) { + U8_NEXT(s, offset, -1, c); + + if (U8_IS_SINGLE(s[curr]) && s[curr] == ' ') *p++ = '+'; - else if (U8_IS_SINGLE(c) && isalnum(c)) - *p++ = c; + else if (U8_IS_SINGLE(s[curr]) && isalnum(s[curr])) + *p++ = s[offset]; else { - sprintf(p, ".%02X", c); - p += 3; + aux_urlifyChar(&p, c); } + curr = offset; } *p++ = 0; @@ -2046,6 +2078,29 @@ uw_unit uw_Basis_urlifyTime_w(uw_context ctx, uw_Basis_time t) { return uw_Basis_urlifyInt_w(ctx, (uw_Basis_int)t.seconds * 1000000 + t.microseconds); } +uw_unit uw_Basis_urlifyChar_w(uw_context ctx, uw_Basis_char c) { + if (c == '\0') { + uw_check(ctx, 1); + uw_writec_unsafe(ctx, '_'); + return uw_unit_v; + } + + uw_check(ctx, 12 + !!(c == '_')); + + if (c == '_') + uw_writec_unsafe(ctx, '_'); + + if (c == ' ') + uw_writec_unsafe(ctx, '+'); + else if (isalnum(c) && c <= 0x7f) + uw_writec_unsafe(ctx, c); + else { + aux_urlifyChar(&(ctx->page.front), c); + } + + return uw_unit_v; +} + uw_unit uw_Basis_urlifyString_w(uw_context ctx, uw_Basis_string s) { if (s[0] == '\0') { uw_check(ctx, 1); @@ -2053,22 +2108,24 @@ uw_unit uw_Basis_urlifyString_w(uw_context ctx, uw_Basis_string s) { return uw_unit_v; } - uw_check(ctx, strlen(s) * 3 + !!(s[0] == '_')); + uw_check(ctx, strlen(s) * 12 + !!(s[0] == '_')); if (s[0] == '_') uw_writec_unsafe(ctx, '_'); - for (; *s; s++) { - unsigned char c = *s; - - if (c == ' ') + uw_Basis_char c; + int offset = 0, curr = 0; + while (s[offset] != 0) { + U8_NEXT(s, offset, -1, c); + + if (U8_IS_SINGLE(s[curr]) && s[curr] == ' ') uw_writec_unsafe(ctx, '+'); - else if (U8_IS_SINGLE(c) && isalnum(c)) - uw_writec_unsafe(ctx, c); - else { - sprintf(ctx->page.front, ".%02X", c); - ctx->page.front += 3; + else if (U8_IS_SINGLE(s[curr]) && isalnum(s[curr])) + uw_writec_unsafe(ctx, s[curr]); + else { + aux_urlifyChar(&(ctx->page.front), c); } + curr = offset; } return uw_unit_v; diff --git a/src/mono_opt.sml b/src/mono_opt.sml index 40b865b0..218be1ba 100644 --- a/src/mono_opt.sml +++ b/src/mono_opt.sml @@ -66,16 +66,64 @@ val htmlifyString = String.translate (fn #"<" => "<" fun htmlifySpecialChar ch = "&#" ^ Int.toString (ord ch) ^ ";" -fun hexIt ch = +fun hexPad c = let - val s = Int.fmt StringCvt.HEX (ord ch) + val s = Int.fmt StringCvt.HEX c in - case size s of + case size s of 0 => "00" | 1 => "0" ^ s | _ => s end +fun rsh a b = + Int.fromLarge (IntInf.~>>(IntInf.fromInt a, Word.fromInt b)) + +fun orb a b = + Int.fromLarge (IntInf.orb(IntInf.fromInt a, IntInf.fromInt b)) + +fun andb a b = + Int.fromLarge (IntInf.andb(IntInf.fromInt a, IntInf.fromInt b)) + + +fun hexIt ch = + let + val c = ord ch + in + if (c <= 0x7f) then + hexPad c + else + ((if (c <= 0x7fff) then + hexPad (orb (rsh c 6) 0xc0) + else + (if (c <= 0xffff) then + hexPad (orb (rsh c 12) 0xe0) + else + hexPad (orb (rsh c 18) 0xf0) + ^ hexPad (orb (andb (rsh c 12) 0x3f) 0x80) + ) + ^ hexPad (orb (andb (rsh c 6) 0x3f) 0x80)) + ) ^ hexPad (orb (andb c 0x3f) 0x80) + end + +fun urlifyCharAux ch = + case ch of + #" " => "+" + | _ => + if ord ch = 0 then + "_" + else + if Char.isAlphaNum ch then + str ch + else + "." ^ hexIt ch + +fun urlifyChar c = + case c of + #"_" => "_" ^ urlifyCharAux c + | _ => urlifyCharAux c + + fun urlifyString s = case s of "" => "_" @@ -84,11 +132,7 @@ fun urlifyString s = "_" else "") - ^ String.translate (fn #" " => "+" - | ch => if Char.isAlphaNum ch then - str ch - else - "." ^ hexIt ch) s + ^ String.translate urlifyCharAux s fun sqlifyInt n = #p_cast (Settings.currentDbms ()) (attrifyInt n, Settings.Int) @@ -349,6 +393,13 @@ fun exp e = | EWrite (EFfiApp ("Basis", "urlifyString", [e]), _) => EFfiApp ("Basis", "urlifyString_w", [e]) + | EFfiApp ("Basis", "urlifyChar", [((EPrim (Prim.Char c), _), _)]) => + EPrim (Prim.String (Prim.Normal, urlifyChar c)) + | EWrite (EFfiApp ("Basis", "urlifyChar", [((EPrim (Prim.Char c), _), _)]), loc) => + EWrite (EPrim (Prim.String (Prim.Normal, urlifyChar c)), loc) + | EWrite (EFfiApp ("Basis", "urlifyChar", [e]), _) => + EFfiApp ("Basis", "urlifyChar_w", [e]) + | EFfiApp ("Basis", "urlifyBool", [((ECon (Enum, PConFfi {con = "True", ...}, NONE), _), _)]) => EPrim (Prim.String (Prim.Normal, "1")) | EFfiApp ("Basis", "urlifyBool", [((ECon (Enum, PConFfi {con = "False", ...}, NONE), _), _)]) => -- cgit v1.2.3 From 1448db1b509db4a897a5fb11a8d35ec643d82b50 Mon Sep 17 00:00:00 2001 From: fab Date: Mon, 14 Jan 2019 23:06:29 +0000 Subject: bug fix --- src/c/urweb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/c/urweb.c b/src/c/urweb.c index e98b5772..4742bc3e 100644 --- a/src/c/urweb.c +++ b/src/c/urweb.c @@ -2004,7 +2004,7 @@ char *uw_Basis_urlifyString(uw_context ctx, uw_Basis_string s) { if (U8_IS_SINGLE(s[curr]) && s[curr] == ' ') *p++ = '+'; else if (U8_IS_SINGLE(s[curr]) && isalnum(s[curr])) - *p++ = s[offset]; + *p++ = s[curr]; else { aux_urlifyChar(&p, c); } -- cgit v1.2.3 From 0844858d23b5a0d695ad719a650e755cc21a235c Mon Sep 17 00:00:00 2001 From: fab Date: Mon, 21 Jan 2019 20:17:56 +0000 Subject: fixes for review 1 --- lib/js/urweb.js | 1 + src/mono_opt.sml | 58 ++++++++++++++++++++++++++++---------------------------- 2 files changed, 30 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/lib/js/urweb.js b/lib/js/urweb.js index 6b493c4f..357e4c1c 100644 --- a/lib/js/urweb.js +++ b/lib/js/urweb.js @@ -7,6 +7,7 @@ function needsDynPrefix() { return scripts.length == 0; } +// Codepoint implementations brought from https://norbertlindenberg.com/2012/05/ecmascript-supplementary-characters/#String if (!String.fromCodePoint) { String.fromCodePoint = function () { var chars = [], i; diff --git a/src/mono_opt.sml b/src/mono_opt.sml index 218be1ba..cc85f05b 100644 --- a/src/mono_opt.sml +++ b/src/mono_opt.sml @@ -68,9 +68,9 @@ fun htmlifySpecialChar ch = "&#" ^ Int.toString (ord ch) ^ ";" fun hexPad c = let - val s = Int.fmt StringCvt.HEX c + val s = Int.fmt StringCvt.HEX c in - case size s of + case size s of 0 => "00" | 1 => "0" ^ s | _ => s @@ -88,42 +88,42 @@ fun andb a b = fun hexIt ch = let - val c = ord ch + val c = ord ch in - if (c <= 0x7f) then - hexPad c - else - ((if (c <= 0x7fff) then - hexPad (orb (rsh c 6) 0xc0) - else - (if (c <= 0xffff) then - hexPad (orb (rsh c 12) 0xe0) - else - hexPad (orb (rsh c 18) 0xf0) - ^ hexPad (orb (andb (rsh c 12) 0x3f) 0x80) - ) - ^ hexPad (orb (andb (rsh c 6) 0x3f) 0x80)) - ) ^ hexPad (orb (andb c 0x3f) 0x80) + if (c <= 0x7f) then + hexPad c + else + ((if (c <= 0x7fff) then + hexPad (orb (rsh c 6) 0xc0) + else + (if (c <= 0xffff) then + hexPad (orb (rsh c 12) 0xe0) + else + hexPad (orb (rsh c 18) 0xf0) + ^ hexPad (orb (andb (rsh c 12) 0x3f) 0x80) + ) + ^ hexPad (orb (andb (rsh c 6) 0x3f) 0x80)) + ) ^ hexPad (orb (andb c 0x3f) 0x80) end fun urlifyCharAux ch = case ch of - #" " => "+" + #" " => "+" | _ => - if ord ch = 0 then - "_" - else - if Char.isAlphaNum ch then - str ch + if ord ch = 0 then + "_" + else + if Char.isAlphaNum ch then + str ch else - "." ^ hexIt ch - + "." ^ hexIt ch + fun urlifyChar c = case c of - #"_" => "_" ^ urlifyCharAux c + #"_" => "_" ^ urlifyCharAux c | _ => urlifyCharAux c - - + + fun urlifyString s = case s of "" => "_" @@ -399,7 +399,7 @@ fun exp e = EWrite (EPrim (Prim.String (Prim.Normal, urlifyChar c)), loc) | EWrite (EFfiApp ("Basis", "urlifyChar", [e]), _) => EFfiApp ("Basis", "urlifyChar_w", [e]) - + | EFfiApp ("Basis", "urlifyBool", [((ECon (Enum, PConFfi {con = "True", ...}, NONE), _), _)]) => EPrim (Prim.String (Prim.Normal, "1")) | EFfiApp ("Basis", "urlifyBool", [((ECon (Enum, PConFfi {con = "False", ...}, NONE), _), _)]) => -- cgit v1.2.3