summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar fab <fabrice.leal.ch@gmail.com>2018-12-06 21:24:04 +0000
committerGravatar fab <fabrice.leal.ch@gmail.com>2018-12-06 21:24:04 +0000
commitdf191c8374991f65c5f5a552cfa5f4fb08fe29e8 (patch)
tree42f8c1d6bae638bf47e2e012dff8feba30e74463
parentbc1547efbbad30da255b7c29973c94c8d37edabc (diff)
chars with more than 2 bytes are awkwardly handled by the "normal" string of javascript. the best way to get consistent results seems to be to convert to array by Array.from(...) and back to strings with .join("")
-rw-r--r--lib/js/urweb.js14
-rw-r--r--src/c/urweb.c14
-rw-r--r--tests/utf8.ur20
3 files changed, 36 insertions, 12 deletions
diff --git a/lib/js/urweb.js b/lib/js/urweb.js
index c7725e28..e28446e3 100644
--- a/lib/js/urweb.js
+++ b/lib/js/urweb.js
@@ -1462,9 +1462,9 @@ function s2b(s) { return s == "True" ? true : s == "False" ? false : null; }
function s2be(s) { return s == "True" ? true : s == "False" ? false : er("Illegal Boolean " ^ s); }
function id(x) { return x; }
-function sub(s, i) { return s.charAt(i); }
-function suf(s, i) { return s.substring(i); }
-function slen(s) { return s.length; }
+function sub(s, i) { return Array.from(s)[i].codePointAt(0); }
+function suf(s, i) { return Array.from(s).slice(i).join(""); }
+function slen(s) { return Array.from(s).length; }
function sidx(s, ch) {
var r = s.indexOf(ch);
if (r == -1)
@@ -1494,10 +1494,10 @@ function schr(s, ch) {
return s.substring(r);
}
function ssub(s, start, len) {
- return s.substring(start, start+len);
+ return Array.from(s).slice(start, start+len).join("");
}
function strlenGe(s, len) {
- return s.length >= len;
+ return slen(s) >= len;
}
function trimZeroes(s) {
@@ -1596,11 +1596,11 @@ function strcmp(str1, str2) {
}
function chr(n) {
- return String.fromCharCode(n);
+ return String.fromCodePoint(n);
}
function htmlifySpecialChar(ch) {
- return "&#" + ch.charCodeAt(0) + ";";
+ return "&#" + ch.codePointAt(0) + ";";
}
diff --git a/src/c/urweb.c b/src/c/urweb.c
index d622df87..1394e068 100644
--- a/src/c/urweb.c
+++ b/src/c/urweb.c
@@ -1594,9 +1594,13 @@ void jsifyChar(char**buffer_ptr, uw_context ctx, uw_Basis_char c1) {
buffer += offset;
}
else {
- assert(65536 > c1);
- sprintf(buffer, "\\u%04x", c1);
- buffer += 6;
+ if(65536 > c1) {
+ sprintf(buffer, "\\u%04x", c1);
+ buffer += 6;
+ } else {
+ sprintf(buffer, "\\u{%06x}", c1);
+ buffer += 10;
+ }
}
}
@@ -1608,7 +1612,7 @@ uw_Basis_string uw_Basis_jsifyString(uw_context ctx, uw_Basis_string s) {
char *r, *s2;
uw_Basis_char c;
- uw_check_heap(ctx, strlen(s) * 6 + 3);
+ uw_check_heap(ctx, strlen(s) * 10 + 3);
r = s2 = ctx->heap.front;
*s2++ = '"';
@@ -1632,7 +1636,7 @@ uw_Basis_int uw_Basis_ord(uw_context ctx, uw_Basis_char c);
uw_Basis_string uw_Basis_jsifyChar(uw_context ctx, uw_Basis_char c1) {
char *r, *s2;
- uw_check_heap(ctx, 8);
+ uw_check_heap(ctx, 10);
r = s2 = ctx->heap.front;
diff --git a/tests/utf8.ur b/tests/utf8.ur
index e7c7fd40..cf781fa9 100644
--- a/tests/utf8.ur
+++ b/tests/utf8.ur
@@ -31,6 +31,25 @@ fun test_fn_cside [a ::: Type] (_ : eq a) (_ : show a) (f : unit -> a) (expected
</active>
</xml>
+fun highencode () : transaction page =
+ return <xml>
+ <body>
+ {test_fn_cside (fn _ => strlen "𝌆𝌇𝌈𝌉") (strlen "𝌆𝌇𝌈𝌉") "high encode - strlen 1"}
+ {test_fn_cside (fn _ => strlen "𝌇𝌈𝌉") (strlen "𝌇𝌈𝌉") "high encode - strlen 2"}
+ {test_fn_cside (fn _ => strlen "𝌈𝌉") (strlen "𝌈𝌉") "high encode - strlen 3"}
+ {test_fn_cside (fn _ => strlen "𝌉") (strlen "𝌉") "high encode - strlen 4"}
+
+ {test_fn_cside (fn _ => substring "𝌆𝌇𝌈𝌉" 1 3) (substring "𝌆𝌇𝌈𝌉" 1 3) "high encode - substring 1"}
+ {test_fn_cside (fn _ => substring "𝌆𝌇𝌈𝌉" 2 2) (substring "𝌆𝌇𝌈𝌉" 2 2) "high encode - substring 2"}
+ {test_fn_cside (fn _ => substring "𝌆𝌇𝌈𝌉" 3 1) (substring "𝌆𝌇𝌈𝌉" 3 1) "high encode - substring 3"}
+
+ {test_fn_cside (fn _ => strlen (substring "𝌆𝌇𝌈𝌉" 1 3)) (strlen (substring "𝌆𝌇𝌈𝌉" 1 3)) "high encode - strlen of substring 1"}
+ {test_fn_cside (fn _ => strlen (substring "𝌆𝌇𝌈𝌉" 2 2)) (strlen (substring "𝌆𝌇𝌈𝌉" 2 2)) "high encode - strlen of substring 2"}
+ {test_fn_cside (fn _ => strlen (substring "𝌆𝌇𝌈𝌉" 3 1)) (strlen (substring "𝌆𝌇𝌈𝌉" 3 1)) "high encode - strlen of substring 3"}
+
+ </body>
+ </xml>
+
fun substrings () : transaction page =
return <xml>
<body>
@@ -510,6 +529,7 @@ fun index () : transaction page =
<a link={touppers ()}>touppers</a>
<a link={ord_and_chrs ()}>ord_and_chrs</a>
<a link={test_ords ()}>test ord</a>
+ <a link={highencode ()}>highencode</a>
<a link={test_db ()}>test_db</a>
</body>
</xml>