summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Adam Chlipala <adam@chlipala.net>2019-09-25 19:54:59 -0400
committerGravatar Adam Chlipala <adam@chlipala.net>2019-09-25 19:54:59 -0400
commit8728f397bee2b567611dcd7a7c359c7e92159c1c (patch)
tree283c2b659f5b9a4ed6ef097ba4b0f2052d966c28
parentc388a91762e9dd9aef2eb097963af020c0e452f8 (diff)
Unicode escapes in JSON
-rw-r--r--include/urweb/urweb_cpp.h1
-rw-r--r--lib/ur/basis.urs1
-rw-r--r--lib/ur/json.ur29
-rw-r--r--src/c/urweb.c12
4 files changed, 43 insertions, 0 deletions
diff --git a/include/urweb/urweb_cpp.h b/include/urweb/urweb_cpp.h
index dcf67fef..e4ad6e61 100644
--- a/include/urweb/urweb_cpp.h
+++ b/include/urweb/urweb_cpp.h
@@ -166,6 +166,7 @@ uw_Basis_string uw_Basis_strchr(struct uw_context *, const char *, uw_Basis_char
uw_Basis_int uw_Basis_strcspn(struct uw_context *, const char *, const char *);
uw_Basis_string uw_Basis_substring(struct uw_context *, const char *, uw_Basis_int, uw_Basis_int);
uw_Basis_string uw_Basis_str1(struct uw_context *, uw_Basis_char);
+uw_Basis_string uw_Basis_ofUnicode(struct uw_context *, uw_Basis_int);
uw_Basis_string uw_strdup(struct uw_context *, const char *);
uw_Basis_string uw_maybe_strdup(struct uw_context *, const char *);
diff --git a/lib/ur/basis.urs b/lib/ur/basis.urs
index 2a98bf6f..d29bf6e6 100644
--- a/lib/ur/basis.urs
+++ b/lib/ur/basis.urs
@@ -95,6 +95,7 @@ val strsindex : string -> string -> option int
val strcspn : string -> string -> int
val substring : string -> int -> int -> string
val str1 : char -> string
+val ofUnicode : int -> string
class show
val show : t ::: Type -> show t -> t -> string
diff --git a/lib/ur/json.ur b/lib/ur/json.ur
index 05406739..70f0c797 100644
--- a/lib/ur/json.ur
+++ b/lib/ur/json.ur
@@ -59,6 +59,17 @@ fun escape s =
"\"" ^ esc s
end
+fun unhex ch =
+ if Char.isDigit ch then
+ Char.toInt ch - Char.toInt #"0"
+ else if Char.isXdigit ch then
+ if Char.isUpper ch then
+ 10 + (Char.toInt ch - Char.toInt #"A")
+ else
+ 10 + (Char.toInt ch - Char.toInt #"a")
+ else
+ error <xml>Invalid hexadecimal digit "{[ch]}"</xml>
+
fun unescape s =
let
val len = String.length s
@@ -75,6 +86,11 @@ fun unescape s =
| #"\\" =>
if i+1 >= len then
error <xml>JSON unescape: Bad escape sequence: {[s]}</xml>
+ else if String.sub s (i + 1) = #"u" then
+ if i+5 >= len then
+ error <xml>JSON unescape: Bad escape sequence: {[s]}</xml>
+ else
+ findEnd (i+6)
else
findEnd (i+2)
| _ => findEnd (i+1)
@@ -93,6 +109,19 @@ fun unescape s =
#"\\" =>
if i+1 >= len then
error <xml>JSON unescape: Bad escape sequence: {[s]}</xml>
+ else if String.sub s (i+1) = #"u" then
+ if i+5 >= len then
+ error <xml>JSON unescape: Unicode ends early</xml>
+ else
+ let
+ val n =
+ unhex (String.sub s (i+2)) * (256*16)
+ + unhex (String.sub s (i+3)) * 256
+ + unhex (String.sub s (i+4)) * 16
+ + unhex (String.sub s (i+5))
+ in
+ ofUnicode n ^ unesc (i+6)
+ end
else
(case String.sub s (i+1) of
#"n" => "\n"
diff --git a/src/c/urweb.c b/src/c/urweb.c
index af929269..8c445f39 100644
--- a/src/c/urweb.c
+++ b/src/c/urweb.c
@@ -2724,6 +2724,18 @@ uw_Basis_string uw_Basis_str1(uw_context ctx, uw_Basis_char ch) {
return r;
}
+uw_Basis_string uw_Basis_ofUnicode(uw_context ctx, uw_Basis_int n) {
+ UChar buf16[] = {n};
+ uw_Basis_string out = uw_malloc(ctx, 3);
+ int32_t outLen;
+ UErrorCode pErrorCode = 0;
+
+ if (u_strToUTF8(out, 3, &outLen, buf16, 1, &pErrorCode) == NULL || outLen == 0)
+ uw_error(ctx, FATAL, "Bad Unicode string to unescape (error %s)", u_errorName(pErrorCode));
+
+ return out;
+}
+
uw_Basis_string uw_strdup(uw_context ctx, uw_Basis_string s1) {
int len = strlen(s1) + 1;
char *s;